tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

optical_flow.c (44384B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 #include <math.h>
     12 #include <limits.h>
     13 
     14 #include "config/aom_config.h"
     15 
     16 #include "aom_dsp/mathutils.h"
     17 #include "aom_mem/aom_mem.h"
     18 
     19 #include "av1/common/av1_common_int.h"
     20 #include "av1/encoder/encoder.h"
     21 #include "av1/encoder/optical_flow.h"
     22 #include "av1/encoder/sparse_linear_solver.h"
     23 #include "av1/encoder/reconinter_enc.h"
     24 
     25 #if CONFIG_OPTICAL_FLOW_API
     26 
     27 void av1_init_opfl_params(OPFL_PARAMS *opfl_params) {
     28  opfl_params->pyramid_levels = OPFL_PYRAMID_LEVELS;
     29  opfl_params->warping_steps = OPFL_WARPING_STEPS;
     30  opfl_params->lk_params = NULL;
     31 }
     32 
     33 void av1_init_lk_params(LK_PARAMS *lk_params) {
     34  lk_params->window_size = OPFL_WINDOW_SIZE;
     35 }
     36 
     37 // Helper function to determine whether a frame is encoded with high bit-depth.
     38 static inline int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
     39  return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
     40 }
     41 
     42 // Helper function to determine whether optical flow method is sparse.
     43 static inline int is_sparse(const OPFL_PARAMS *opfl_params) {
     44  return (opfl_params->flags & OPFL_FLAG_SPARSE) ? 1 : 0;
     45 }
     46 
     47 static void gradients_over_window(const YV12_BUFFER_CONFIG *frame,
     48                                  const YV12_BUFFER_CONFIG *ref_frame,
     49                                  const double x_coord, const double y_coord,
     50                                  const int window_size, const int bit_depth,
     51                                  double *ix, double *iy, double *it,
     52                                  LOCALMV *mv);
     53 
     54 // coefficients for bilinear interpolation on unit square
     55 static int pixel_interp(const double x, const double y, const double b00,
     56                        const double b01, const double b10, const double b11) {
     57  const int xint = (int)x;
     58  const int yint = (int)y;
     59  const double xdec = x - xint;
     60  const double ydec = y - yint;
     61  const double a = (1 - xdec) * (1 - ydec);
     62  const double b = xdec * (1 - ydec);
     63  const double c = (1 - xdec) * ydec;
     64  const double d = xdec * ydec;
     65  // if x, y are already integers, this results to b00
     66  int interp = (int)round(a * b00 + b * b01 + c * b10 + d * b11);
     67  return interp;
     68 }
     69 
     70 // Scharr filter to compute spatial gradient
     71 static void spatial_gradient(const YV12_BUFFER_CONFIG *frame, const int x_coord,
     72                             const int y_coord, const int direction,
     73                             double *derivative) {
     74  double *filter;
     75  // Scharr filters
     76  double gx[9] = { -3, 0, 3, -10, 0, 10, -3, 0, 3 };
     77  double gy[9] = { -3, -10, -3, 0, 0, 0, 3, 10, 3 };
     78  if (direction == 0) {  // x direction
     79    filter = gx;
     80  } else {  // y direction
     81    filter = gy;
     82  }
     83  int idx = 0;
     84  double d = 0;
     85  for (int yy = -1; yy <= 1; yy++) {
     86    for (int xx = -1; xx <= 1; xx++) {
     87      d += filter[idx] *
     88           frame->y_buffer[(y_coord + yy) * frame->y_stride + (x_coord + xx)];
     89      idx++;
     90    }
     91  }
     92  // normalization scaling factor for scharr
     93  *derivative = d / 32.0;
     94 }
     95 
     96 // Determine the spatial gradient at subpixel locations
     97 // For example, when reducing images for pyramidal LK,
     98 // corners found in original image may be at subpixel locations.
     99 static void gradient_interp(double *fullpel_deriv, const double x_coord,
    100                            const double y_coord, const int w, const int h,
    101                            double *derivative) {
    102  const int xint = (int)x_coord;
    103  const int yint = (int)y_coord;
    104  double interp;
    105  if (xint + 1 > w - 1 || yint + 1 > h - 1) {
    106    interp = fullpel_deriv[yint * w + xint];
    107  } else {
    108    interp = pixel_interp(x_coord, y_coord, fullpel_deriv[yint * w + xint],
    109                          fullpel_deriv[yint * w + (xint + 1)],
    110                          fullpel_deriv[(yint + 1) * w + xint],
    111                          fullpel_deriv[(yint + 1) * w + (xint + 1)]);
    112  }
    113 
    114  *derivative = interp;
    115 }
    116 
    117 static void temporal_gradient(const YV12_BUFFER_CONFIG *frame,
    118                              const YV12_BUFFER_CONFIG *frame2,
    119                              const double x_coord, const double y_coord,
    120                              const int bit_depth, double *derivative,
    121                              LOCALMV *mv) {
    122  const int w = 2;
    123  const int h = 2;
    124  uint8_t pred1[4];
    125  uint8_t pred2[4];
    126 
    127  const int y = (int)y_coord;
    128  const int x = (int)x_coord;
    129  const double ydec = y_coord - y;
    130  const double xdec = x_coord - x;
    131  const int is_intrabc = 0;  // Is intra-copied?
    132  const int is_high_bitdepth = is_frame_high_bitdepth(frame2);
    133  const int subsampling_x = 0, subsampling_y = 0;  // for y-buffer
    134  const int_interpfilters interp_filters =
    135      av1_broadcast_interp_filter(MULTITAP_SHARP);
    136  const int plane = 0;  // y-plane
    137  const struct buf_2d ref_buf2 = { NULL, frame2->y_buffer, frame2->y_crop_width,
    138                                   frame2->y_crop_height, frame2->y_stride };
    139  struct scale_factors scale;
    140  av1_setup_scale_factors_for_frame(&scale, frame->y_crop_width,
    141                                    frame->y_crop_height, frame->y_crop_width,
    142                                    frame->y_crop_height);
    143  InterPredParams inter_pred_params;
    144  av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
    145                        subsampling_y, bit_depth, is_high_bitdepth, is_intrabc,
    146                        &scale, &ref_buf2, interp_filters);
    147  inter_pred_params.interp_filter_params[0] =
    148      &av1_interp_filter_params_list[interp_filters.as_filters.x_filter];
    149  inter_pred_params.interp_filter_params[1] =
    150      &av1_interp_filter_params_list[interp_filters.as_filters.y_filter];
    151  inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
    152  MV newmv = { .row = (int16_t)round((mv->row + xdec) * 8),
    153               .col = (int16_t)round((mv->col + ydec) * 8) };
    154  av1_enc_build_one_inter_predictor(pred2, w, &newmv, &inter_pred_params);
    155  const struct buf_2d ref_buf1 = { NULL, frame->y_buffer, frame->y_crop_width,
    156                                   frame->y_crop_height, frame->y_stride };
    157  av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
    158                        subsampling_y, bit_depth, is_high_bitdepth, is_intrabc,
    159                        &scale, &ref_buf1, interp_filters);
    160  inter_pred_params.interp_filter_params[0] =
    161      &av1_interp_filter_params_list[interp_filters.as_filters.x_filter];
    162  inter_pred_params.interp_filter_params[1] =
    163      &av1_interp_filter_params_list[interp_filters.as_filters.y_filter];
    164  inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
    165  MV zeroMV = { .row = (int16_t)round(xdec * 8),
    166                .col = (int16_t)round(ydec * 8) };
    167  av1_enc_build_one_inter_predictor(pred1, w, &zeroMV, &inter_pred_params);
    168 
    169  *derivative = pred2[0] - pred1[0];
    170 }
    171 
    172 // Numerical differentiate over window_size x window_size surrounding (x,y)
    173 // location. Alters ix, iy, it to contain numerical partial derivatives
    174 static void gradients_over_window(const YV12_BUFFER_CONFIG *frame,
    175                                  const YV12_BUFFER_CONFIG *ref_frame,
    176                                  const double x_coord, const double y_coord,
    177                                  const int window_size, const int bit_depth,
    178                                  double *ix, double *iy, double *it,
    179                                  LOCALMV *mv) {
    180  const double left = x_coord - window_size / 2.0;
    181  const double top = y_coord - window_size / 2.0;
    182  // gradient operators need pixel before and after (start at 1)
    183  const double x_start = AOMMAX(1, left);
    184  const double y_start = AOMMAX(1, top);
    185  const int frame_height = frame->y_crop_height;
    186  const int frame_width = frame->y_crop_width;
    187  double deriv_x;
    188  double deriv_y;
    189  double deriv_t;
    190 
    191  const double x_end = AOMMIN(x_coord + window_size / 2.0, frame_width - 2);
    192  const double y_end = AOMMIN(y_coord + window_size / 2.0, frame_height - 2);
    193  const int xs = (int)AOMMAX(1, x_start - 1);
    194  const int ys = (int)AOMMAX(1, y_start - 1);
    195  const int xe = (int)AOMMIN(x_end + 2, frame_width - 2);
    196  const int ye = (int)AOMMIN(y_end + 2, frame_height - 2);
    197  // with normalization, gradients may be double values
    198  double *fullpel_dx = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_x));
    199  double *fullpel_dy = aom_malloc((ye - ys) * (xe - xs) * sizeof(deriv_y));
    200  if (!fullpel_dx || !fullpel_dy) {
    201    aom_free(fullpel_dx);
    202    aom_free(fullpel_dy);
    203    return;
    204  }
    205 
    206  // TODO(any): This could be more efficient in the case that x_coord
    207  // and y_coord are integers.. but it may look more messy.
    208 
    209  // calculate spatial gradients at full pixel locations
    210  for (int j = ys; j < ye; j++) {
    211    for (int i = xs; i < xe; i++) {
    212      spatial_gradient(frame, i, j, 0, &deriv_x);
    213      spatial_gradient(frame, i, j, 1, &deriv_y);
    214      int idx = (j - ys) * (xe - xs) + (i - xs);
    215      fullpel_dx[idx] = deriv_x;
    216      fullpel_dy[idx] = deriv_y;
    217    }
    218  }
    219  // compute numerical differentiation for every pixel in window
    220  // (this potentially includes subpixels)
    221  for (double j = y_start; j < y_end; j++) {
    222    for (double i = x_start; i < x_end; i++) {
    223      temporal_gradient(frame, ref_frame, i, j, bit_depth, &deriv_t, mv);
    224      gradient_interp(fullpel_dx, i - xs, j - ys, xe - xs, ye - ys, &deriv_x);
    225      gradient_interp(fullpel_dy, i - xs, j - ys, xe - xs, ye - ys, &deriv_y);
    226      int idx = (int)(j - top) * window_size + (int)(i - left);
    227      ix[idx] = deriv_x;
    228      iy[idx] = deriv_y;
    229      it[idx] = deriv_t;
    230    }
    231  }
    232  // TODO(any): to avoid setting deriv arrays to zero for every iteration,
    233  // could instead pass these two values back through function call
    234  // int first_idx = (int)(y_start - top) * window_size + (int)(x_start - left);
    235  // int width = window_size - ((int)(x_start - left) + (int)(left + window_size
    236  // - x_end));
    237 
    238  aom_free(fullpel_dx);
    239  aom_free(fullpel_dy);
    240 }
    241 
    242 // To compute eigenvalues of 2x2 matrix: Solve for lambda where
    243 // Determinant(matrix - lambda*identity) == 0
    244 static void eigenvalues_2x2(const double *matrix, double *eig) {
    245  const double a = 1;
    246  const double b = -1 * matrix[0] - matrix[3];
    247  const double c = -1 * matrix[1] * matrix[2] + matrix[0] * matrix[3];
    248  // quadratic formula
    249  const double discriminant = b * b - 4 * a * c;
    250  eig[0] = (-b - sqrt(discriminant)) / (2.0 * a);
    251  eig[1] = (-b + sqrt(discriminant)) / (2.0 * a);
    252  // double check that eigenvalues are ordered by magnitude
    253  if (fabs(eig[0]) > fabs(eig[1])) {
    254    double tmp = eig[0];
    255    eig[0] = eig[1];
    256    eig[1] = tmp;
    257  }
    258 }
    259 
    260 // Shi-Tomasi corner detection criteria
    261 static double corner_score(const YV12_BUFFER_CONFIG *frame_to_filter,
    262                           const YV12_BUFFER_CONFIG *ref_frame, const int x,
    263                           const int y, double *i_x, double *i_y, double *i_t,
    264                           const int n, const int bit_depth) {
    265  double eig[2];
    266  LOCALMV mv = { .row = 0, .col = 0 };
    267  // TODO(any): technically, ref_frame and i_t are not used by corner score
    268  // so these could be replaced by dummy variables,
    269  // or change this to spatial gradient function over window only
    270  gradients_over_window(frame_to_filter, ref_frame, x, y, n, bit_depth, i_x,
    271                        i_y, i_t, &mv);
    272  double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 };
    273  multiply_mat(i_x, i_x, Mres1, 1, n * n, 1);
    274  multiply_mat(i_x, i_y, Mres2, 1, n * n, 1);
    275  multiply_mat(i_y, i_y, Mres3, 1, n * n, 1);
    276  double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] };
    277  eigenvalues_2x2(M, eig);
    278  return fabs(eig[0]);
    279 }
    280 
    281 // Finds corners in frame_to_filter
    282 // For less strict requirements (i.e. more corners), decrease threshold
    283 static int detect_corners(const YV12_BUFFER_CONFIG *frame_to_filter,
    284                          const YV12_BUFFER_CONFIG *ref_frame,
    285                          const int maxcorners, int *ref_corners,
    286                          const int bit_depth) {
    287  const int frame_height = frame_to_filter->y_crop_height;
    288  const int frame_width = frame_to_filter->y_crop_width;
    289  // TODO(any): currently if maxcorners is decreased, then it only means
    290  // corners will be omited from bottom-right of image. if maxcorners
    291  // is actually used, then this algorithm would need to re-iterate
    292  // and choose threshold based on that
    293  assert(maxcorners == frame_height * frame_width);
    294  int countcorners = 0;
    295  const double threshold = 0.1;
    296  double score;
    297  const int n = 3;
    298  double i_x[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    299  double i_y[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    300  double i_t[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    301  const int fromedge = n;
    302  double max_score = corner_score(frame_to_filter, ref_frame, fromedge,
    303                                  fromedge, i_x, i_y, i_t, n, bit_depth);
    304  // rough estimate of max corner score in image
    305  for (int x = fromedge; x < frame_width - fromedge; x += 1) {
    306    for (int y = fromedge; y < frame_height - fromedge; y += frame_height / 5) {
    307      for (int i = 0; i < n * n; i++) {
    308        i_x[i] = 0;
    309        i_y[i] = 0;
    310        i_t[i] = 0;
    311      }
    312      score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n,
    313                           bit_depth);
    314      if (score > max_score) {
    315        max_score = score;
    316      }
    317    }
    318  }
    319  // score all the points and choose corners over threshold
    320  for (int x = fromedge; x < frame_width - fromedge; x += 1) {
    321    for (int y = fromedge;
    322         (y < frame_height - fromedge) && countcorners < maxcorners; y += 1) {
    323      for (int i = 0; i < n * n; i++) {
    324        i_x[i] = 0;
    325        i_y[i] = 0;
    326        i_t[i] = 0;
    327      }
    328      score = corner_score(frame_to_filter, ref_frame, x, y, i_x, i_y, i_t, n,
    329                           bit_depth);
    330      if (score > threshold * max_score) {
    331        ref_corners[countcorners * 2] = x;
    332        ref_corners[countcorners * 2 + 1] = y;
    333        countcorners++;
    334      }
    335    }
    336  }
    337  return countcorners;
    338 }
    339 
    340 // weights is an nxn matrix. weights is filled with a gaussian function,
    341 // with independent variable: distance from the center point.
    342 static void gaussian(const double sigma, const int n, const int normalize,
    343                     double *weights) {
    344  double total_weight = 0;
    345  for (int j = 0; j < n; j++) {
    346    for (int i = 0; i < n; i++) {
    347      double distance = sqrt(pow(n / 2 - i, 2) + pow(n / 2 - j, 2));
    348      double weight = exp(-0.5 * pow(distance / sigma, 2));
    349      weights[j * n + i] = weight;
    350      total_weight += weight;
    351    }
    352  }
    353  if (normalize == 1) {
    354    for (int j = 0; j < n; j++) {
    355      weights[j] = weights[j] / total_weight;
    356    }
    357  }
    358 }
    359 
    360 static double convolve(const double *filter, const int *img, const int size) {
    361  double result = 0;
    362  for (int i = 0; i < size; i++) {
    363    result += filter[i] * img[i];
    364  }
    365  return result;
    366 }
    367 
    368 // Applies a Gaussian low-pass smoothing filter to produce
    369 // a corresponding lower resolution image with halved dimensions
    370 static void reduce(uint8_t *img, int height, int width, int stride,
    371                   uint8_t *reduced_img) {
    372  const int new_width = width / 2;
    373  const int window_size = 5;
    374  const double gaussian_filter[25] = {
    375    1. / 256, 1.0 / 64, 3. / 128, 1. / 64,  1. / 256, 1. / 64, 1. / 16,
    376    3. / 32,  1. / 16,  1. / 64,  3. / 128, 3. / 32,  9. / 64, 3. / 32,
    377    3. / 128, 1. / 64,  1. / 16,  3. / 32,  1. / 16,  1. / 64, 1. / 256,
    378    1. / 64,  3. / 128, 1. / 64,  1. / 256
    379  };
    380  // filter is 5x5 so need prev and forward 2 pixels
    381  int img_section[25];
    382  for (int y = 0; y < height - 1; y += 2) {
    383    for (int x = 0; x < width - 1; x += 2) {
    384      int i = 0;
    385      for (int yy = y - window_size / 2; yy <= y + window_size / 2; yy++) {
    386        for (int xx = x - window_size / 2; xx <= x + window_size / 2; xx++) {
    387          int yvalue = yy;
    388          int xvalue = xx;
    389          // copied pixels outside the boundary
    390          if (yvalue < 0) yvalue = 0;
    391          if (xvalue < 0) xvalue = 0;
    392          if (yvalue >= height) yvalue = height - 1;
    393          if (xvalue >= width) xvalue = width - 1;
    394          img_section[i++] = img[yvalue * stride + xvalue];
    395        }
    396      }
    397      reduced_img[(y / 2) * new_width + (x / 2)] = (uint8_t)convolve(
    398          gaussian_filter, img_section, window_size * window_size);
    399    }
    400  }
    401 }
    402 
    403 static int cmpfunc(const void *a, const void *b) {
    404  return (*(int *)a - *(int *)b);
    405 }
    406 static void filter_mvs(const MV_FILTER_TYPE mv_filter, const int frame_height,
    407                       const int frame_width, LOCALMV *localmvs, MV *mvs) {
    408  const int n = 5;  // window size
    409  // for smoothing filter
    410  const double gaussian_filter[25] = {
    411    1. / 256, 1. / 64,  3. / 128, 1. / 64,  1. / 256, 1. / 64, 1. / 16,
    412    3. / 32,  1. / 16,  1. / 64,  3. / 128, 3. / 32,  9. / 64, 3. / 32,
    413    3. / 128, 1. / 64,  1. / 16,  3. / 32,  1. / 16,  1. / 64, 1. / 256,
    414    1. / 64,  3. / 128, 1. / 64,  1. / 256
    415  };
    416  // for median filter
    417  int mvrows[25];
    418  int mvcols[25];
    419  if (mv_filter != MV_FILTER_NONE) {
    420    for (int y = 0; y < frame_height; y++) {
    421      for (int x = 0; x < frame_width; x++) {
    422        int center_idx = y * frame_width + x;
    423        int i = 0;
    424        double filtered_row = 0;
    425        double filtered_col = 0;
    426        for (int yy = y - n / 2; yy <= y + n / 2; yy++) {
    427          for (int xx = x - n / 2; xx <= x + n / 2; xx++) {
    428            int yvalue = yy;
    429            int xvalue = xx;
    430            // copied pixels outside the boundary
    431            if (yvalue < 0) yvalue = 0;
    432            if (xvalue < 0) xvalue = 0;
    433            if (yvalue >= frame_height) yvalue = frame_height - 1;
    434            if (xvalue >= frame_width) xvalue = frame_width - 1;
    435            int index = yvalue * frame_width + xvalue;
    436            if (mv_filter == MV_FILTER_SMOOTH) {
    437              filtered_row += mvs[index].row * gaussian_filter[i];
    438              filtered_col += mvs[index].col * gaussian_filter[i];
    439            } else if (mv_filter == MV_FILTER_MEDIAN) {
    440              mvrows[i] = mvs[index].row;
    441              mvcols[i] = mvs[index].col;
    442            }
    443            i++;
    444          }
    445        }
    446 
    447        MV mv = mvs[center_idx];
    448        if (mv_filter == MV_FILTER_SMOOTH) {
    449          mv.row = (int16_t)filtered_row;
    450          mv.col = (int16_t)filtered_col;
    451        } else if (mv_filter == MV_FILTER_MEDIAN) {
    452          qsort(mvrows, 25, sizeof(mv.row), cmpfunc);
    453          qsort(mvcols, 25, sizeof(mv.col), cmpfunc);
    454          mv.row = mvrows[25 / 2];
    455          mv.col = mvcols[25 / 2];
    456        }
    457        LOCALMV localmv = { .row = ((double)mv.row) / 8,
    458                            .col = ((double)mv.row) / 8 };
    459        localmvs[y * frame_width + x] = localmv;
    460        // if mvs array is immediately updated here, then the result may
    461        // propagate to other pixels.
    462      }
    463    }
    464    for (int i = 0; i < frame_height * frame_width; i++) {
    465      MV mv = { .row = (int16_t)round(8 * localmvs[i].row),
    466                .col = (int16_t)round(8 * localmvs[i].col) };
    467      mvs[i] = mv;
    468    }
    469  }
    470 }
    471 
    472 // Computes optical flow at a single pyramid level,
    473 // using Lucas-Kanade algorithm.
    474 // Modifies mvs array.
    475 static void lucas_kanade(const YV12_BUFFER_CONFIG *from_frame,
    476                         const YV12_BUFFER_CONFIG *to_frame, const int level,
    477                         const LK_PARAMS *lk_params, const int num_ref_corners,
    478                         int *ref_corners, const int mv_stride,
    479                         const int bit_depth, LOCALMV *mvs) {
    480  assert(lk_params->window_size > 0 && lk_params->window_size % 2 == 0);
    481  const int n = lk_params->window_size;
    482  // algorithm is sensitive to window size
    483  double *i_x = (double *)aom_malloc(n * n * sizeof(*i_x));
    484  double *i_y = (double *)aom_malloc(n * n * sizeof(*i_y));
    485  double *i_t = (double *)aom_malloc(n * n * sizeof(*i_t));
    486  double *weights = (double *)aom_malloc(n * n * sizeof(*weights));
    487  if (!i_x || !i_y || !i_t || !weights) goto free_lk_buf;
    488 
    489  const int expand_multiplier = (int)pow(2, level);
    490  double sigma = 0.2 * n;
    491  // normalizing doesn't really affect anything since it's applied
    492  // to every component of M and b
    493  gaussian(sigma, n, 0, weights);
    494  for (int i = 0; i < num_ref_corners; i++) {
    495    const double x_coord = 1.0 * ref_corners[i * 2] / expand_multiplier;
    496    const double y_coord = 1.0 * ref_corners[i * 2 + 1] / expand_multiplier;
    497    int highres_x = ref_corners[i * 2];
    498    int highres_y = ref_corners[i * 2 + 1];
    499    int mv_idx = highres_y * (mv_stride) + highres_x;
    500    LOCALMV mv_old = mvs[mv_idx];
    501    mv_old.row = mv_old.row / expand_multiplier;
    502    mv_old.col = mv_old.col / expand_multiplier;
    503    // using this instead of memset, since it's not completely
    504    // clear if zero memset works on double arrays
    505    for (int j = 0; j < n * n; j++) {
    506      i_x[j] = 0;
    507      i_y[j] = 0;
    508      i_t[j] = 0;
    509    }
    510    gradients_over_window(from_frame, to_frame, x_coord, y_coord, n, bit_depth,
    511                          i_x, i_y, i_t, &mv_old);
    512    double Mres1[1] = { 0 }, Mres2[1] = { 0 }, Mres3[1] = { 0 };
    513    double bres1[1] = { 0 }, bres2[1] = { 0 };
    514    for (int j = 0; j < n * n; j++) {
    515      Mres1[0] += weights[j] * i_x[j] * i_x[j];
    516      Mres2[0] += weights[j] * i_x[j] * i_y[j];
    517      Mres3[0] += weights[j] * i_y[j] * i_y[j];
    518      bres1[0] += weights[j] * i_x[j] * i_t[j];
    519      bres2[0] += weights[j] * i_y[j] * i_t[j];
    520    }
    521    double M[4] = { Mres1[0], Mres2[0], Mres2[0], Mres3[0] };
    522    double b[2] = { -1 * bres1[0], -1 * bres2[0] };
    523    double eig[2] = { 1, 1 };
    524    eigenvalues_2x2(M, eig);
    525    double threshold = 0.1;
    526    if (fabs(eig[0]) > threshold) {
    527      // if M is not invertible, then displacement
    528      // will default to zeros
    529      double u[2] = { 0, 0 };
    530      linsolve(2, M, 2, b, u);
    531      int mult = 1;
    532      if (level != 0)
    533        mult = expand_multiplier;  // mv doubles when resolution doubles
    534      LOCALMV mv = { .row = (mult * (u[0] + mv_old.row)),
    535                     .col = (mult * (u[1] + mv_old.col)) };
    536      mvs[mv_idx] = mv;
    537      mvs[mv_idx] = mv;
    538    }
    539  }
    540 free_lk_buf:
    541  aom_free(weights);
    542  aom_free(i_t);
    543  aom_free(i_x);
    544  aom_free(i_y);
    545 }
    546 
    547 // Warp the src_frame to warper_frame according to mvs.
    548 // mvs point to src_frame
    549 static void warp_back_frame(YV12_BUFFER_CONFIG *warped_frame,
    550                            const YV12_BUFFER_CONFIG *src_frame,
    551                            const LOCALMV *mvs, int mv_stride) {
    552  int w, h;
    553  const int fw = src_frame->y_crop_width;
    554  const int fh = src_frame->y_crop_height;
    555  const int src_fs = src_frame->y_stride, warped_fs = warped_frame->y_stride;
    556  const uint8_t *src_buf = src_frame->y_buffer;
    557  uint8_t *warped_buf = warped_frame->y_buffer;
    558  double temp;
    559  for (h = 0; h < fh; h++) {
    560    for (w = 0; w < fw; w++) {
    561      double cord_x = (double)w + mvs[h * mv_stride + w].col;
    562      double cord_y = (double)h + mvs[h * mv_stride + w].row;
    563      cord_x = fclamp(cord_x, 0, (double)(fw - 1));
    564      cord_y = fclamp(cord_y, 0, (double)(fh - 1));
    565      const int floorx = (int)floor(cord_x);
    566      const int floory = (int)floor(cord_y);
    567      const double fracx = cord_x - (double)floorx;
    568      const double fracy = cord_y - (double)floory;
    569 
    570      temp = 0;
    571      for (int hh = 0; hh < 2; hh++) {
    572        const double weighth = hh ? (fracy) : (1 - fracy);
    573        for (int ww = 0; ww < 2; ww++) {
    574          const double weightw = ww ? (fracx) : (1 - fracx);
    575          int y = floory + hh;
    576          int x = floorx + ww;
    577          y = clamp(y, 0, fh - 1);
    578          x = clamp(x, 0, fw - 1);
    579          temp += (double)src_buf[y * src_fs + x] * weightw * weighth;
    580        }
    581      }
    582      warped_buf[h * warped_fs + w] = (uint8_t)round(temp);
    583    }
    584  }
    585 }
    586 
    587 // Same as warp_back_frame, but using a better interpolation filter.
    588 static void warp_back_frame_intp(YV12_BUFFER_CONFIG *warped_frame,
    589                                 const YV12_BUFFER_CONFIG *src_frame,
    590                                 const LOCALMV *mvs, int mv_stride) {
    591  int w, h;
    592  const int fw = src_frame->y_crop_width;
    593  const int fh = src_frame->y_crop_height;
    594  const int warped_fs = warped_frame->y_stride;
    595  uint8_t *warped_buf = warped_frame->y_buffer;
    596  const int blk = 2;
    597  uint8_t temp_blk[4];
    598 
    599  const int is_intrabc = 0;  // Is intra-copied?
    600  const int is_high_bitdepth = is_frame_high_bitdepth(src_frame);
    601  const int subsampling_x = 0, subsampling_y = 0;  // for y-buffer
    602  const int_interpfilters interp_filters =
    603      av1_broadcast_interp_filter(MULTITAP_SHARP2);
    604  const int plane = 0;  // y-plane
    605  const struct buf_2d ref_buf2 = { NULL, src_frame->y_buffer,
    606                                   src_frame->y_crop_width,
    607                                   src_frame->y_crop_height,
    608                                   src_frame->y_stride };
    609  const int bit_depth = src_frame->bit_depth;
    610  struct scale_factors scale;
    611  av1_setup_scale_factors_for_frame(
    612      &scale, src_frame->y_crop_width, src_frame->y_crop_height,
    613      src_frame->y_crop_width, src_frame->y_crop_height);
    614 
    615  for (h = 0; h < fh; h++) {
    616    for (w = 0; w < fw; w++) {
    617      InterPredParams inter_pred_params;
    618      av1_init_inter_params(&inter_pred_params, blk, blk, h, w, subsampling_x,
    619                            subsampling_y, bit_depth, is_high_bitdepth,
    620                            is_intrabc, &scale, &ref_buf2, interp_filters);
    621      inter_pred_params.interp_filter_params[0] =
    622          &av1_interp_filter_params_list[interp_filters.as_filters.x_filter];
    623      inter_pred_params.interp_filter_params[1] =
    624          &av1_interp_filter_params_list[interp_filters.as_filters.y_filter];
    625      inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
    626      MV newmv = { .row = (int16_t)round((mvs[h * mv_stride + w].row) * 8),
    627                   .col = (int16_t)round((mvs[h * mv_stride + w].col) * 8) };
    628      av1_enc_build_one_inter_predictor(temp_blk, blk, &newmv,
    629                                        &inter_pred_params);
    630      warped_buf[h * warped_fs + w] = temp_blk[0];
    631    }
    632  }
    633 }
    634 
    635 #define DERIVATIVE_FILTER_LENGTH 7
    636 double filter[DERIVATIVE_FILTER_LENGTH] = { -1.0 / 60, 9.0 / 60,  -45.0 / 60, 0,
    637                                            45.0 / 60, -9.0 / 60, 1.0 / 60 };
    638 
    639 // Get gradient of the whole frame
    640 static void get_frame_gradients(const YV12_BUFFER_CONFIG *from_frame,
    641                                const YV12_BUFFER_CONFIG *to_frame, double *ix,
    642                                double *iy, double *it, int grad_stride) {
    643  int w, h, k, idx;
    644  const int fw = from_frame->y_crop_width;
    645  const int fh = from_frame->y_crop_height;
    646  const int from_fs = from_frame->y_stride, to_fs = to_frame->y_stride;
    647  const uint8_t *from_buf = from_frame->y_buffer;
    648  const uint8_t *to_buf = to_frame->y_buffer;
    649 
    650  const int lh = DERIVATIVE_FILTER_LENGTH;
    651  const int hleft = (lh - 1) / 2;
    652 
    653  for (h = 0; h < fh; h++) {
    654    for (w = 0; w < fw; w++) {
    655      // x
    656      ix[h * grad_stride + w] = 0;
    657      for (k = 0; k < lh; k++) {
    658        // if we want to make this block dependent, need to extend the
    659        // boundaries using other initializations.
    660        idx = w + k - hleft;
    661        idx = clamp(idx, 0, fw - 1);
    662        ix[h * grad_stride + w] += filter[k] * 0.5 *
    663                                   ((double)from_buf[h * from_fs + idx] +
    664                                    (double)to_buf[h * to_fs + idx]);
    665      }
    666      // y
    667      iy[h * grad_stride + w] = 0;
    668      for (k = 0; k < lh; k++) {
    669        // if we want to make this block dependent, need to extend the
    670        // boundaries using other initializations.
    671        idx = h + k - hleft;
    672        idx = clamp(idx, 0, fh - 1);
    673        iy[h * grad_stride + w] += filter[k] * 0.5 *
    674                                   ((double)from_buf[idx * from_fs + w] +
    675                                    (double)to_buf[idx * to_fs + w]);
    676      }
    677      // t
    678      it[h * grad_stride + w] =
    679          (double)to_buf[h * to_fs + w] - (double)from_buf[h * from_fs + w];
    680    }
    681  }
    682 }
    683 
    684 // Solve for linear equations given by the H-S method
    685 static void solve_horn_schunck(const double *ix, const double *iy,
    686                               const double *it, int grad_stride, int width,
    687                               int height, const LOCALMV *init_mvs,
    688                               int init_mv_stride, LOCALMV *mvs,
    689                               int mv_stride) {
    690  // TODO(bohanli): May just need to allocate the buffers once per optical flow
    691  // calculation
    692  int *row_pos = aom_calloc(width * height * 28, sizeof(*row_pos));
    693  int *col_pos = aom_calloc(width * height * 28, sizeof(*col_pos));
    694  double *values = aom_calloc(width * height * 28, sizeof(*values));
    695  double *mv_vec = aom_calloc(width * height * 2, sizeof(*mv_vec));
    696  double *mv_init_vec = aom_calloc(width * height * 2, sizeof(*mv_init_vec));
    697  double *temp_b = aom_calloc(width * height * 2, sizeof(*temp_b));
    698  double *b = aom_calloc(width * height * 2, sizeof(*b));
    699  if (!row_pos || !col_pos || !values || !mv_vec || !mv_init_vec || !temp_b ||
    700      !b) {
    701    goto free_hs_solver_buf;
    702  }
    703 
    704  // the location idx for neighboring pixels, k < 4 are the 4 direct neighbors
    705  const int check_locs_y[12] = { 0, 0, -1, 1, -1, -1, 1, 1, 0, 0, -2, 2 };
    706  const int check_locs_x[12] = { -1, 1, 0, 0, -1, 1, -1, 1, -2, 2, 0, 0 };
    707 
    708  int h, w, checkh, checkw, k, ret;
    709  const int offset = height * width;
    710  SPARSE_MTX A;
    711  int c = 0;
    712  const double lambda = 100;
    713 
    714  for (w = 0; w < width; w++) {
    715    for (h = 0; h < height; h++) {
    716      mv_init_vec[w * height + h] = init_mvs[h * init_mv_stride + w].col;
    717      mv_init_vec[w * height + h + offset] =
    718          init_mvs[h * init_mv_stride + w].row;
    719    }
    720  }
    721 
    722  // get matrix A
    723  for (w = 0; w < width; w++) {
    724    for (h = 0; h < height; h++) {
    725      int center_num_direct = 4;
    726      const int center_idx = w * height + h;
    727      if (w == 0 || w == width - 1) center_num_direct--;
    728      if (h == 0 || h == height - 1) center_num_direct--;
    729      // diagonal entry for this row from the center pixel
    730      double cor_w = center_num_direct * center_num_direct + center_num_direct;
    731      row_pos[c] = center_idx;
    732      col_pos[c] = center_idx;
    733      values[c] = lambda * cor_w;
    734      c++;
    735      row_pos[c] = center_idx + offset;
    736      col_pos[c] = center_idx + offset;
    737      values[c] = lambda * cor_w;
    738      c++;
    739      // other entries from direct neighbors
    740      for (k = 0; k < 4; k++) {
    741        checkh = h + check_locs_y[k];
    742        checkw = w + check_locs_x[k];
    743        if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) {
    744          continue;
    745        }
    746        int this_idx = checkw * height + checkh;
    747        int this_num_direct = 4;
    748        if (checkw == 0 || checkw == width - 1) this_num_direct--;
    749        if (checkh == 0 || checkh == height - 1) this_num_direct--;
    750        cor_w = -center_num_direct - this_num_direct;
    751        row_pos[c] = center_idx;
    752        col_pos[c] = this_idx;
    753        values[c] = lambda * cor_w;
    754        c++;
    755        row_pos[c] = center_idx + offset;
    756        col_pos[c] = this_idx + offset;
    757        values[c] = lambda * cor_w;
    758        c++;
    759      }
    760      // entries from neighbors on the diagonal corners
    761      for (k = 4; k < 8; k++) {
    762        checkh = h + check_locs_y[k];
    763        checkw = w + check_locs_x[k];
    764        if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) {
    765          continue;
    766        }
    767        int this_idx = checkw * height + checkh;
    768        cor_w = 2;
    769        row_pos[c] = center_idx;
    770        col_pos[c] = this_idx;
    771        values[c] = lambda * cor_w;
    772        c++;
    773        row_pos[c] = center_idx + offset;
    774        col_pos[c] = this_idx + offset;
    775        values[c] = lambda * cor_w;
    776        c++;
    777      }
    778      // entries from neighbors with dist of 2
    779      for (k = 8; k < 12; k++) {
    780        checkh = h + check_locs_y[k];
    781        checkw = w + check_locs_x[k];
    782        if (checkh < 0 || checkh >= height || checkw < 0 || checkw >= width) {
    783          continue;
    784        }
    785        int this_idx = checkw * height + checkh;
    786        cor_w = 1;
    787        row_pos[c] = center_idx;
    788        col_pos[c] = this_idx;
    789        values[c] = lambda * cor_w;
    790        c++;
    791        row_pos[c] = center_idx + offset;
    792        col_pos[c] = this_idx + offset;
    793        values[c] = lambda * cor_w;
    794        c++;
    795      }
    796    }
    797  }
    798  ret = av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
    799                            2 * width * height, &A);
    800  if (ret < 0) goto free_hs_solver_buf;
    801  // subtract init mv part from b
    802  av1_mtx_vect_multi_left(&A, mv_init_vec, temp_b, 2 * width * height);
    803  for (int i = 0; i < 2 * width * height; i++) {
    804    b[i] = -temp_b[i];
    805  }
    806  av1_free_sparse_mtx_elems(&A);
    807 
    808  // add cross terms to A and modify b with ExEt / EyEt
    809  for (w = 0; w < width; w++) {
    810    for (h = 0; h < height; h++) {
    811      int curidx = w * height + h;
    812      // modify b
    813      b[curidx] += -ix[h * grad_stride + w] * it[h * grad_stride + w];
    814      b[curidx + offset] += -iy[h * grad_stride + w] * it[h * grad_stride + w];
    815      // add cross terms to A
    816      row_pos[c] = curidx;
    817      col_pos[c] = curidx + offset;
    818      values[c] = ix[h * grad_stride + w] * iy[h * grad_stride + w];
    819      c++;
    820      row_pos[c] = curidx + offset;
    821      col_pos[c] = curidx;
    822      values[c] = ix[h * grad_stride + w] * iy[h * grad_stride + w];
    823      c++;
    824    }
    825  }
    826  // Add diagonal terms to A
    827  for (int i = 0; i < c; i++) {
    828    if (row_pos[i] == col_pos[i]) {
    829      if (row_pos[i] < offset) {
    830        w = row_pos[i] / height;
    831        h = row_pos[i] % height;
    832        values[i] += pow(ix[h * grad_stride + w], 2);
    833      } else {
    834        w = (row_pos[i] - offset) / height;
    835        h = (row_pos[i] - offset) % height;
    836        values[i] += pow(iy[h * grad_stride + w], 2);
    837      }
    838    }
    839  }
    840 
    841  ret = av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
    842                            2 * width * height, &A);
    843  if (ret < 0) goto free_hs_solver_buf;
    844 
    845  // solve for the mvs
    846  ret = av1_conjugate_gradient_sparse(&A, b, 2 * width * height, mv_vec);
    847  if (ret < 0) goto free_hs_solver_buf;
    848 
    849  // copy mvs
    850  for (w = 0; w < width; w++) {
    851    for (h = 0; h < height; h++) {
    852      mvs[h * mv_stride + w].col = mv_vec[w * height + h];
    853      mvs[h * mv_stride + w].row = mv_vec[w * height + h + offset];
    854    }
    855  }
    856 free_hs_solver_buf:
    857  aom_free(row_pos);
    858  aom_free(col_pos);
    859  aom_free(values);
    860  aom_free(mv_vec);
    861  aom_free(mv_init_vec);
    862  aom_free(b);
    863  aom_free(temp_b);
    864  av1_free_sparse_mtx_elems(&A);
    865 }
    866 
    867 // Calculate optical flow from from_frame to to_frame using the H-S method.
    868 static void horn_schunck(const YV12_BUFFER_CONFIG *from_frame,
    869                         const YV12_BUFFER_CONFIG *to_frame, const int level,
    870                         const int mv_stride, const int mv_height,
    871                         const int mv_width, const OPFL_PARAMS *opfl_params,
    872                         LOCALMV *mvs) {
    873  // mvs are always on level 0, here we define two new mv arrays that is of size
    874  // of this level.
    875  const int fw = from_frame->y_crop_width;
    876  const int fh = from_frame->y_crop_height;
    877  const int factor = (int)pow(2, level);
    878  int w, h, k, init_mv_stride;
    879  LOCALMV *init_mvs = NULL, *refine_mvs = NULL;
    880  double *ix = NULL, *iy = NULL, *it = NULL;
    881  YV12_BUFFER_CONFIG temp_frame;
    882  temp_frame.y_buffer = NULL;
    883  if (level == 0) {
    884    init_mvs = mvs;
    885    init_mv_stride = mv_stride;
    886  } else {
    887    init_mvs = aom_calloc(fw * fh, sizeof(*mvs));
    888    if (!init_mvs) goto free_hs_buf;
    889    init_mv_stride = fw;
    890    for (h = 0; h < fh; h++) {
    891      for (w = 0; w < fw; w++) {
    892        init_mvs[h * init_mv_stride + w].row =
    893            mvs[h * factor * mv_stride + w * factor].row / (double)factor;
    894        init_mvs[h * init_mv_stride + w].col =
    895            mvs[h * factor * mv_stride + w * factor].col / (double)factor;
    896      }
    897    }
    898  }
    899  refine_mvs = aom_calloc(fw * fh, sizeof(*mvs));
    900  if (!refine_mvs) goto free_hs_buf;
    901  // temp frame for warping
    902  temp_frame.y_buffer =
    903      (uint8_t *)aom_calloc(fh * fw, sizeof(*temp_frame.y_buffer));
    904  if (!temp_frame.y_buffer) goto free_hs_buf;
    905  temp_frame.y_crop_height = fh;
    906  temp_frame.y_crop_width = fw;
    907  temp_frame.y_stride = fw;
    908  // gradient buffers
    909  ix = aom_calloc(fw * fh, sizeof(*ix));
    910  iy = aom_calloc(fw * fh, sizeof(*iy));
    911  it = aom_calloc(fw * fh, sizeof(*it));
    912  if (!ix || !iy || !it) goto free_hs_buf;
    913  // For each warping step
    914  for (k = 0; k < opfl_params->warping_steps; k++) {
    915    // warp from_frame with init_mv
    916    if (level == 0) {
    917      warp_back_frame_intp(&temp_frame, to_frame, init_mvs, init_mv_stride);
    918    } else {
    919      warp_back_frame(&temp_frame, to_frame, init_mvs, init_mv_stride);
    920    }
    921    // calculate frame gradients
    922    get_frame_gradients(from_frame, &temp_frame, ix, iy, it, fw);
    923    // form linear equations and solve mvs
    924    solve_horn_schunck(ix, iy, it, fw, fw, fh, init_mvs, init_mv_stride,
    925                       refine_mvs, fw);
    926    // update init_mvs
    927    for (h = 0; h < fh; h++) {
    928      for (w = 0; w < fw; w++) {
    929        init_mvs[h * init_mv_stride + w].col += refine_mvs[h * fw + w].col;
    930        init_mvs[h * init_mv_stride + w].row += refine_mvs[h * fw + w].row;
    931      }
    932    }
    933  }
    934  // copy back the mvs if needed
    935  if (level != 0) {
    936    for (h = 0; h < mv_height; h++) {
    937      for (w = 0; w < mv_width; w++) {
    938        mvs[h * mv_stride + w].row =
    939            init_mvs[h / factor * init_mv_stride + w / factor].row *
    940            (double)factor;
    941        mvs[h * mv_stride + w].col =
    942            init_mvs[h / factor * init_mv_stride + w / factor].col *
    943            (double)factor;
    944      }
    945    }
    946  }
    947 free_hs_buf:
    948  if (level != 0) aom_free(init_mvs);
    949  aom_free(refine_mvs);
    950  aom_free(temp_frame.y_buffer);
    951  aom_free(ix);
    952  aom_free(iy);
    953  aom_free(it);
    954 }
    955 
    956 // Apply optical flow iteratively at each pyramid level
    957 static void pyramid_optical_flow(const YV12_BUFFER_CONFIG *from_frame,
    958                                 const YV12_BUFFER_CONFIG *to_frame,
    959                                 const int bit_depth,
    960                                 const OPFL_PARAMS *opfl_params,
    961                                 const OPTFLOW_METHOD method, LOCALMV *mvs) {
    962  assert(opfl_params->pyramid_levels > 0 &&
    963         opfl_params->pyramid_levels <= MAX_PYRAMID_LEVELS);
    964  int levels = opfl_params->pyramid_levels;
    965  const int frame_height = from_frame->y_crop_height;
    966  const int frame_width = from_frame->y_crop_width;
    967  if ((frame_height / pow(2.0, levels - 1) < 50 ||
    968       frame_height / pow(2.0, levels - 1) < 50) &&
    969      levels > 1)
    970    levels = levels - 1;
    971  uint8_t *images1[MAX_PYRAMID_LEVELS] = { NULL };
    972  uint8_t *images2[MAX_PYRAMID_LEVELS] = { NULL };
    973  int *ref_corners = NULL;
    974 
    975  images1[0] = from_frame->y_buffer;
    976  images2[0] = to_frame->y_buffer;
    977  YV12_BUFFER_CONFIG *buffers1 = aom_malloc(levels * sizeof(*buffers1));
    978  YV12_BUFFER_CONFIG *buffers2 = aom_malloc(levels * sizeof(*buffers2));
    979  if (!buffers1 || !buffers2) goto free_pyramid_buf;
    980  buffers1[0] = *from_frame;
    981  buffers2[0] = *to_frame;
    982  int fw = frame_width;
    983  int fh = frame_height;
    984  for (int i = 1; i < levels; i++) {
    985    // TODO(bohanli): may need to extend buffers for better interpolation SIMD
    986    images1[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(*images1[i]));
    987    images2[i] = (uint8_t *)aom_calloc(fh / 2 * fw / 2, sizeof(*images2[i]));
    988    if (!images1[i] || !images2[i]) goto free_pyramid_buf;
    989    int stride;
    990    if (i == 1)
    991      stride = from_frame->y_stride;
    992    else
    993      stride = fw;
    994    reduce(images1[i - 1], fh, fw, stride, images1[i]);
    995    reduce(images2[i - 1], fh, fw, stride, images2[i]);
    996    fh /= 2;
    997    fw /= 2;
    998    YV12_BUFFER_CONFIG a = { .y_buffer = images1[i],
    999                             .y_crop_width = fw,
   1000                             .y_crop_height = fh,
   1001                             .y_stride = fw };
   1002    YV12_BUFFER_CONFIG b = { .y_buffer = images2[i],
   1003                             .y_crop_width = fw,
   1004                             .y_crop_height = fh,
   1005                             .y_stride = fw };
   1006    buffers1[i] = a;
   1007    buffers2[i] = b;
   1008  }
   1009  // Compute corners for specific frame
   1010  int num_ref_corners = 0;
   1011  if (is_sparse(opfl_params)) {
   1012    int maxcorners = from_frame->y_crop_width * from_frame->y_crop_height;
   1013    ref_corners = aom_malloc(maxcorners * 2 * sizeof(*ref_corners));
   1014    if (!ref_corners) goto free_pyramid_buf;
   1015    num_ref_corners = detect_corners(from_frame, to_frame, maxcorners,
   1016                                     ref_corners, bit_depth);
   1017  }
   1018  const int stop_level = 0;
   1019  for (int i = levels - 1; i >= stop_level; i--) {
   1020    if (method == LUCAS_KANADE) {
   1021      assert(is_sparse(opfl_params));
   1022      lucas_kanade(&buffers1[i], &buffers2[i], i, opfl_params->lk_params,
   1023                   num_ref_corners, ref_corners, buffers1[0].y_crop_width,
   1024                   bit_depth, mvs);
   1025    } else if (method == HORN_SCHUNCK) {
   1026      assert(!is_sparse(opfl_params));
   1027      horn_schunck(&buffers1[i], &buffers2[i], i, buffers1[0].y_crop_width,
   1028                   buffers1[0].y_crop_height, buffers1[0].y_crop_width,
   1029                   opfl_params, mvs);
   1030    }
   1031  }
   1032 free_pyramid_buf:
   1033  for (int i = 1; i < levels; i++) {
   1034    aom_free(images1[i]);
   1035    aom_free(images2[i]);
   1036  }
   1037  aom_free(ref_corners);
   1038  aom_free(buffers1);
   1039  aom_free(buffers2);
   1040 }
   1041 // Computes optical flow by applying algorithm at
   1042 // multiple pyramid levels of images (lower-resolution, smoothed images)
   1043 // This accounts for larger motions.
   1044 // Inputs:
   1045 //   from_frame Frame buffer.
   1046 //   to_frame: Frame buffer. MVs point from_frame -> to_frame.
   1047 //   from_frame_idx: Index of from_frame.
   1048 //   to_frame_idx: Index of to_frame. Return all zero MVs when idx are equal.
   1049 //   bit_depth:
   1050 //   opfl_params: contains algorithm-specific parameters.
   1051 //   mv_filter: MV_FILTER_NONE, MV_FILTER_SMOOTH, or MV_FILTER_MEDIAN.
   1052 //   method: LUCAS_KANADE, HORN_SCHUNCK
   1053 //   mvs: pointer to MVs. Contains initialization, and modified
   1054 //   based on optical flow. Must have
   1055 //   dimensions = from_frame->y_crop_width * from_frame->y_crop_height
   1056 void av1_optical_flow(const YV12_BUFFER_CONFIG *from_frame,
   1057                      const YV12_BUFFER_CONFIG *to_frame,
   1058                      const int from_frame_idx, const int to_frame_idx,
   1059                      const int bit_depth, const OPFL_PARAMS *opfl_params,
   1060                      const MV_FILTER_TYPE mv_filter,
   1061                      const OPTFLOW_METHOD method, MV *mvs) {
   1062  const int frame_height = from_frame->y_crop_height;
   1063  const int frame_width = from_frame->y_crop_width;
   1064  // TODO(any): deal with the case where frames are not of the same dimensions
   1065  assert(frame_height == to_frame->y_crop_height &&
   1066         frame_width == to_frame->y_crop_width);
   1067  if (from_frame_idx == to_frame_idx) {
   1068    // immediately return all zero mvs when frame indices are equal
   1069    for (int yy = 0; yy < frame_height; yy++) {
   1070      for (int xx = 0; xx < frame_width; xx++) {
   1071        MV mv = { .row = 0, .col = 0 };
   1072        mvs[yy * frame_width + xx] = mv;
   1073      }
   1074    }
   1075    return;
   1076  }
   1077 
   1078  // Initialize double mvs based on input parameter mvs array
   1079  LOCALMV *localmvs =
   1080      aom_malloc(frame_height * frame_width * sizeof(*localmvs));
   1081  if (!localmvs) return;
   1082 
   1083  filter_mvs(MV_FILTER_SMOOTH, frame_height, frame_width, localmvs, mvs);
   1084 
   1085  for (int i = 0; i < frame_width * frame_height; i++) {
   1086    MV mv = mvs[i];
   1087    LOCALMV localmv = { .row = ((double)mv.row) / 8,
   1088                        .col = ((double)mv.col) / 8 };
   1089    localmvs[i] = localmv;
   1090  }
   1091  // Apply optical flow algorithm
   1092  pyramid_optical_flow(from_frame, to_frame, bit_depth, opfl_params, method,
   1093                       localmvs);
   1094 
   1095  // Update original mvs array
   1096  for (int j = 0; j < frame_height; j++) {
   1097    for (int i = 0; i < frame_width; i++) {
   1098      int idx = j * frame_width + i;
   1099      if (j + localmvs[idx].row < 0 || j + localmvs[idx].row >= frame_height ||
   1100          i + localmvs[idx].col < 0 || i + localmvs[idx].col >= frame_width) {
   1101        continue;
   1102      }
   1103      MV mv = { .row = (int16_t)round(8 * localmvs[idx].row),
   1104                .col = (int16_t)round(8 * localmvs[idx].col) };
   1105      mvs[idx] = mv;
   1106    }
   1107  }
   1108 
   1109  filter_mvs(mv_filter, frame_height, frame_width, localmvs, mvs);
   1110 
   1111  aom_free(localmvs);
   1112 }
   1113 #endif