tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

noise_model.c (68786B)


      1 /*
      2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <math.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <string.h>
     16 
     17 #include "aom_dsp/aom_dsp_common.h"
     18 #include "aom_dsp/mathutils.h"
     19 #include "aom_dsp/noise_model.h"
     20 #include "aom_dsp/noise_util.h"
     21 #include "aom_mem/aom_mem.h"
     22 #include "aom_ports/mem.h"
     23 #include "aom_scale/yv12config.h"
     24 
     25 #define kLowPolyNumParams 3
     26 
     27 static const int kMaxLag = 4;
     28 
     29 // Defines a function that can be used to obtain the mean of a block for the
     30 // provided data type (uint8_t, or uint16_t)
     31 #define GET_BLOCK_MEAN(INT_TYPE, suffix)                                    \
     32  static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
     33                                        int stride, int x_o, int y_o,       \
     34                                        int block_size) {                   \
     35    const int max_h = AOMMIN(h - y_o, block_size);                          \
     36    const int max_w = AOMMIN(w - x_o, block_size);                          \
     37    double block_mean = 0;                                                  \
     38    for (int y = 0; y < max_h; ++y) {                                       \
     39      for (int x = 0; x < max_w; ++x) {                                     \
     40        block_mean += data[(y_o + y) * stride + x_o + x];                   \
     41      }                                                                     \
     42    }                                                                       \
     43    return block_mean / (max_w * max_h);                                    \
     44  }
     45 
     46 GET_BLOCK_MEAN(uint8_t, lowbd)
     47 GET_BLOCK_MEAN(uint16_t, highbd)
     48 
     49 static inline double get_block_mean(const uint8_t *data, int w, int h,
     50                                    int stride, int x_o, int y_o,
     51                                    int block_size, int use_highbd) {
     52  if (use_highbd)
     53    return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
     54                                 block_size);
     55  return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
     56 }
     57 
     58 // Defines a function that can be used to obtain the variance of a block
     59 // for the provided data type (uint8_t, or uint16_t)
     60 #define GET_NOISE_VAR(INT_TYPE, suffix)                                  \
     61  static double get_noise_var_##suffix(                                  \
     62      const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
     63      int h, int x_o, int y_o, int block_size_x, int block_size_y) {     \
     64    const int max_h = AOMMIN(h - y_o, block_size_y);                     \
     65    const int max_w = AOMMIN(w - x_o, block_size_x);                     \
     66    double noise_var = 0;                                                \
     67    double noise_mean = 0;                                               \
     68    for (int y = 0; y < max_h; ++y) {                                    \
     69      for (int x = 0; x < max_w; ++x) {                                  \
     70        double noise = (double)data[(y_o + y) * stride + x_o + x] -      \
     71                       denoised[(y_o + y) * stride + x_o + x];           \
     72        noise_mean += noise;                                             \
     73        noise_var += noise * noise;                                      \
     74      }                                                                  \
     75    }                                                                    \
     76    noise_mean /= (max_w * max_h);                                       \
     77    return noise_var / (max_w * max_h) - noise_mean * noise_mean;        \
     78  }
     79 
     80 GET_NOISE_VAR(uint8_t, lowbd)
     81 GET_NOISE_VAR(uint16_t, highbd)
     82 
     83 static inline double get_noise_var(const uint8_t *data, const uint8_t *denoised,
     84                                   int w, int h, int stride, int x_o, int y_o,
     85                                   int block_size_x, int block_size_y,
     86                                   int use_highbd) {
     87  if (use_highbd)
     88    return get_noise_var_highbd((const uint16_t *)data,
     89                                (const uint16_t *)denoised, w, h, stride, x_o,
     90                                y_o, block_size_x, block_size_y);
     91  return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
     92                             block_size_x, block_size_y);
     93 }
     94 
     95 static void equation_system_clear(aom_equation_system_t *eqns) {
     96  const int n = eqns->n;
     97  memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
     98  memset(eqns->x, 0, sizeof(*eqns->x) * n);
     99  memset(eqns->b, 0, sizeof(*eqns->b) * n);
    100 }
    101 
    102 static void equation_system_copy(aom_equation_system_t *dst,
    103                                 const aom_equation_system_t *src) {
    104  const int n = dst->n;
    105  memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
    106  memcpy(dst->x, src->x, sizeof(*dst->x) * n);
    107  memcpy(dst->b, src->b, sizeof(*dst->b) * n);
    108 }
    109 
    110 static int equation_system_init(aom_equation_system_t *eqns, int n) {
    111  eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
    112  eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
    113  eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
    114  eqns->n = n;
    115  if (!eqns->A || !eqns->b || !eqns->x) {
    116    fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
    117    aom_free(eqns->A);
    118    aom_free(eqns->b);
    119    aom_free(eqns->x);
    120    memset(eqns, 0, sizeof(*eqns));
    121    return 0;
    122  }
    123  equation_system_clear(eqns);
    124  return 1;
    125 }
    126 
    127 static int equation_system_solve(aom_equation_system_t *eqns) {
    128  const int n = eqns->n;
    129  double *b = (double *)aom_malloc(sizeof(*b) * n);
    130  double *A = (double *)aom_malloc(sizeof(*A) * n * n);
    131  int ret = 0;
    132  if (A == NULL || b == NULL) {
    133    fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
    134    aom_free(b);
    135    aom_free(A);
    136    return 0;
    137  }
    138  memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
    139  memcpy(b, eqns->b, sizeof(*eqns->b) * n);
    140  ret = linsolve(n, A, eqns->n, b, eqns->x);
    141  aom_free(b);
    142  aom_free(A);
    143 
    144  if (ret == 0) {
    145    return 0;
    146  }
    147  return 1;
    148 }
    149 
    150 static void equation_system_add(aom_equation_system_t *dest,
    151                                aom_equation_system_t *src) {
    152  const int n = dest->n;
    153  int i, j;
    154  for (i = 0; i < n; ++i) {
    155    for (j = 0; j < n; ++j) {
    156      dest->A[i * n + j] += src->A[i * n + j];
    157    }
    158    dest->b[i] += src->b[i];
    159  }
    160 }
    161 
    162 static void equation_system_free(aom_equation_system_t *eqns) {
    163  if (!eqns) return;
    164  aom_free(eqns->A);
    165  aom_free(eqns->b);
    166  aom_free(eqns->x);
    167  memset(eqns, 0, sizeof(*eqns));
    168 }
    169 
    170 static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
    171  equation_system_clear(&solver->eqns);
    172  solver->num_equations = 0;
    173  solver->total = 0;
    174 }
    175 
    176 static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
    177                                      aom_noise_strength_solver_t *src) {
    178  equation_system_add(&dest->eqns, &src->eqns);
    179  dest->num_equations += src->num_equations;
    180  dest->total += src->total;
    181 }
    182 
    183 // Return the number of coefficients required for the given parameters
    184 static int num_coeffs(const aom_noise_model_params_t params) {
    185  const int n = 2 * params.lag + 1;
    186  switch (params.shape) {
    187    case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
    188    case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
    189  }
    190  return 0;
    191 }
    192 
    193 static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
    194  const int kNumBins = 20;
    195  if (!equation_system_init(&state->eqns, n)) {
    196    fprintf(stderr, "Failed initialization noise state with size %d\n", n);
    197    return 0;
    198  }
    199  state->ar_gain = 1.0;
    200  state->num_observations = 0;
    201  return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
    202                                        bit_depth);
    203 }
    204 
    205 static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
    206  const double kTolerance = 1e-6;
    207  const int last = eqns->n - 1;
    208  // Set all of the AR coefficients to zero, but try to solve for correlation
    209  // with the luma channel
    210  memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
    211  if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
    212    eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
    213  }
    214 }
    215 
    216 int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
    217  if (!lut) return 0;
    218  if (num_points <= 0) return 0;
    219  lut->num_points = 0;
    220  lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
    221  if (!lut->points) return 0;
    222  lut->num_points = num_points;
    223  memset(lut->points, 0, sizeof(*lut->points) * num_points);
    224  return 1;
    225 }
    226 
    227 void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
    228  if (!lut) return;
    229  aom_free(lut->points);
    230  memset(lut, 0, sizeof(*lut));
    231 }
    232 
    233 double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
    234                                   double x) {
    235  int i = 0;
    236  // Constant extrapolation for x <  x_0.
    237  if (x < lut->points[0][0]) return lut->points[0][1];
    238  for (i = 0; i < lut->num_points - 1; ++i) {
    239    if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
    240      const double a =
    241          (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
    242      return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
    243    }
    244  }
    245  // Constant extrapolation for x > x_{n-1}
    246  return lut->points[lut->num_points - 1][1];
    247 }
    248 
    249 static double noise_strength_solver_get_bin_index(
    250    const aom_noise_strength_solver_t *solver, double value) {
    251  const double val =
    252      fclamp(value, solver->min_intensity, solver->max_intensity);
    253  const double range = solver->max_intensity - solver->min_intensity;
    254  return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
    255 }
    256 
    257 static double noise_strength_solver_get_value(
    258    const aom_noise_strength_solver_t *solver, double x) {
    259  const double bin = noise_strength_solver_get_bin_index(solver, x);
    260  const int bin_i0 = (int)floor(bin);
    261  const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
    262  const double a = bin - bin_i0;
    263  return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
    264 }
    265 
    266 void aom_noise_strength_solver_add_measurement(
    267    aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
    268  const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
    269  const int bin_i0 = (int)floor(bin);
    270  const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
    271  const double a = bin - bin_i0;
    272  const int n = solver->num_bins;
    273  solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
    274  solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
    275  solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
    276  solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
    277  solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
    278  solver->eqns.b[bin_i1] += a * noise_std;
    279  solver->total += noise_std;
    280  solver->num_equations++;
    281 }
    282 
    283 int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
    284  // Add regularization proportional to the number of constraints
    285  const int n = solver->num_bins;
    286  const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
    287  int result = 0;
    288  double mean = 0;
    289 
    290  // Do this in a non-destructive manner so it is not confusing to the caller
    291  double *old_A = solver->eqns.A;
    292  double *A = (double *)aom_malloc(sizeof(*A) * n * n);
    293  if (!A) {
    294    fprintf(stderr, "Unable to allocate copy of A\n");
    295    return 0;
    296  }
    297  memcpy(A, old_A, sizeof(*A) * n * n);
    298 
    299  for (int i = 0; i < n; ++i) {
    300    const int i_lo = AOMMAX(0, i - 1);
    301    const int i_hi = AOMMIN(n - 1, i + 1);
    302    A[i * n + i_lo] -= kAlpha;
    303    A[i * n + i] += 2 * kAlpha;
    304    A[i * n + i_hi] -= kAlpha;
    305  }
    306 
    307  // Small regularization to give average noise strength
    308  mean = solver->total / solver->num_equations;
    309  for (int i = 0; i < n; ++i) {
    310    A[i * n + i] += 1.0 / 8192.;
    311    solver->eqns.b[i] += mean / 8192.;
    312  }
    313  solver->eqns.A = A;
    314  result = equation_system_solve(&solver->eqns);
    315  solver->eqns.A = old_A;
    316 
    317  aom_free(A);
    318  return result;
    319 }
    320 
    321 int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
    322                                   int num_bins, int bit_depth) {
    323  if (!solver) return 0;
    324  memset(solver, 0, sizeof(*solver));
    325  solver->num_bins = num_bins;
    326  solver->min_intensity = 0;
    327  solver->max_intensity = (1 << bit_depth) - 1;
    328  solver->total = 0;
    329  solver->num_equations = 0;
    330  return equation_system_init(&solver->eqns, num_bins);
    331 }
    332 
    333 void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
    334  if (!solver) return;
    335  equation_system_free(&solver->eqns);
    336 }
    337 
    338 double aom_noise_strength_solver_get_center(
    339    const aom_noise_strength_solver_t *solver, int i) {
    340  const double range = solver->max_intensity - solver->min_intensity;
    341  const int n = solver->num_bins;
    342  return ((double)i) / (n - 1) * range + solver->min_intensity;
    343 }
    344 
    345 // Computes the residual if a point were to be removed from the lut. This is
    346 // calculated as the area between the output of the solver and the line segment
    347 // that would be formed between [x_{i - 1}, x_{i + 1}).
    348 static void update_piecewise_linear_residual(
    349    const aom_noise_strength_solver_t *solver,
    350    const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
    351  const double dx = 255. / solver->num_bins;
    352  for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
    353    const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
    354                                    solver, lut->points[i - 1][0])));
    355    const int upper = AOMMIN(solver->num_bins - 1,
    356                             (int)ceil(noise_strength_solver_get_bin_index(
    357                                 solver, lut->points[i + 1][0])));
    358    double r = 0;
    359    for (int j = lower; j <= upper; ++j) {
    360      const double x = aom_noise_strength_solver_get_center(solver, j);
    361      if (x < lut->points[i - 1][0]) continue;
    362      if (x >= lut->points[i + 1][0]) continue;
    363      const double y = solver->eqns.x[j];
    364      const double a = (x - lut->points[i - 1][0]) /
    365                       (lut->points[i + 1][0] - lut->points[i - 1][0]);
    366      const double estimate_y =
    367          lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
    368      r += fabs(y - estimate_y);
    369    }
    370    residual[i] = r * dx;
    371  }
    372 }
    373 
    374 int aom_noise_strength_solver_fit_piecewise(
    375    const aom_noise_strength_solver_t *solver, int max_output_points,
    376    aom_noise_strength_lut_t *lut) {
    377  // The tolerance is normalized to be give consistent results between
    378  // different bit-depths.
    379  const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
    380  if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
    381    fprintf(stderr, "Failed to init lut\n");
    382    return 0;
    383  }
    384  for (int i = 0; i < solver->num_bins; ++i) {
    385    lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
    386    lut->points[i][1] = solver->eqns.x[i];
    387  }
    388  if (max_output_points < 0) {
    389    max_output_points = solver->num_bins;
    390  }
    391 
    392  double *residual = (double *)aom_malloc(solver->num_bins * sizeof(*residual));
    393  if (!residual) {
    394    aom_noise_strength_lut_free(lut);
    395    return 0;
    396  }
    397  memset(residual, 0, sizeof(*residual) * solver->num_bins);
    398 
    399  update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
    400 
    401  // Greedily remove points if there are too many or if it doesn't hurt local
    402  // approximation (never remove the end points)
    403  while (lut->num_points > 2) {
    404    int min_index = 1;
    405    for (int j = 1; j < lut->num_points - 1; ++j) {
    406      if (residual[j] < residual[min_index]) {
    407        min_index = j;
    408      }
    409    }
    410    const double dx =
    411        lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
    412    const double avg_residual = residual[min_index] / dx;
    413    if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
    414      break;
    415    }
    416 
    417    const int num_remaining = lut->num_points - min_index - 1;
    418    memmove(lut->points + min_index, lut->points + min_index + 1,
    419            sizeof(lut->points[0]) * num_remaining);
    420    lut->num_points--;
    421 
    422    update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
    423                                     min_index + 1);
    424  }
    425  aom_free(residual);
    426  return 1;
    427 }
    428 
    429 int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
    430                               int block_size, int bit_depth, int use_highbd) {
    431  const int n = block_size * block_size;
    432  aom_equation_system_t eqns;
    433  double *AtA_inv = 0;
    434  double *A = 0;
    435  int x = 0, y = 0, i = 0, j = 0;
    436  block_finder->A = NULL;
    437  block_finder->AtA_inv = NULL;
    438 
    439  if (!equation_system_init(&eqns, kLowPolyNumParams)) {
    440    fprintf(stderr, "Failed to init equation system for block_size=%d\n",
    441            block_size);
    442    return 0;
    443  }
    444 
    445  AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
    446                                 sizeof(*AtA_inv));
    447  A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
    448  if (AtA_inv == NULL || A == NULL) {
    449    fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
    450            block_size);
    451    aom_free(AtA_inv);
    452    aom_free(A);
    453    equation_system_free(&eqns);
    454    return 0;
    455  }
    456 
    457  block_finder->A = A;
    458  block_finder->AtA_inv = AtA_inv;
    459  block_finder->block_size = block_size;
    460  block_finder->normalization = (1 << bit_depth) - 1;
    461  block_finder->use_highbd = use_highbd;
    462 
    463  for (y = 0; y < block_size; ++y) {
    464    const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
    465    for (x = 0; x < block_size; ++x) {
    466      const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
    467      const double coords[3] = { yd, xd, 1 };
    468      const int row = y * block_size + x;
    469      A[kLowPolyNumParams * row + 0] = yd;
    470      A[kLowPolyNumParams * row + 1] = xd;
    471      A[kLowPolyNumParams * row + 2] = 1;
    472 
    473      for (i = 0; i < kLowPolyNumParams; ++i) {
    474        for (j = 0; j < kLowPolyNumParams; ++j) {
    475          eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
    476        }
    477      }
    478    }
    479  }
    480 
    481  // Lazy inverse using existing equation solver.
    482  for (i = 0; i < kLowPolyNumParams; ++i) {
    483    memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
    484    eqns.b[i] = 1;
    485    equation_system_solve(&eqns);
    486 
    487    for (j = 0; j < kLowPolyNumParams; ++j) {
    488      AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
    489    }
    490  }
    491  equation_system_free(&eqns);
    492  return 1;
    493 }
    494 
    495 void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
    496  if (!block_finder) return;
    497  aom_free(block_finder->A);
    498  aom_free(block_finder->AtA_inv);
    499  memset(block_finder, 0, sizeof(*block_finder));
    500 }
    501 
    502 void aom_flat_block_finder_extract_block(
    503    const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
    504    int w, int h, int stride, int offsx, int offsy, double *plane,
    505    double *block) {
    506  const int block_size = block_finder->block_size;
    507  const int n = block_size * block_size;
    508  const double *A = block_finder->A;
    509  const double *AtA_inv = block_finder->AtA_inv;
    510  double plane_coords[kLowPolyNumParams];
    511  double AtA_inv_b[kLowPolyNumParams];
    512  int xi, yi, i;
    513 
    514  if (block_finder->use_highbd) {
    515    const uint16_t *const data16 = (const uint16_t *const)data;
    516    for (yi = 0; yi < block_size; ++yi) {
    517      const int y = clamp(offsy + yi, 0, h - 1);
    518      for (xi = 0; xi < block_size; ++xi) {
    519        const int x = clamp(offsx + xi, 0, w - 1);
    520        block[yi * block_size + xi] =
    521            ((double)data16[y * stride + x]) / block_finder->normalization;
    522      }
    523    }
    524  } else {
    525    for (yi = 0; yi < block_size; ++yi) {
    526      const int y = clamp(offsy + yi, 0, h - 1);
    527      for (xi = 0; xi < block_size; ++xi) {
    528        const int x = clamp(offsx + xi, 0, w - 1);
    529        block[yi * block_size + xi] =
    530            ((double)data[y * stride + x]) / block_finder->normalization;
    531      }
    532    }
    533  }
    534  multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
    535  multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
    536               kLowPolyNumParams, 1);
    537  multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
    538 
    539  for (i = 0; i < n; ++i) {
    540    block[i] -= plane[i];
    541  }
    542 }
    543 
    544 typedef struct {
    545  int index;
    546  float score;
    547 } index_and_score_t;
    548 
    549 static int compare_scores(const void *a, const void *b) {
    550  const float diff =
    551      ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
    552  if (diff < 0)
    553    return -1;
    554  else if (diff > 0)
    555    return 1;
    556  return 0;
    557 }
    558 
    559 int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
    560                              const uint8_t *const data, int w, int h,
    561                              int stride, uint8_t *flat_blocks) {
    562  // The gradient-based features used in this code are based on:
    563  //  A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
    564  //  correlation for improved video denoising," 2012 19th, ICIP.
    565  // The thresholds are more lenient to allow for correct grain modeling
    566  // if extreme cases.
    567  const int block_size = block_finder->block_size;
    568  const int n = block_size * block_size;
    569  const double kTraceThreshold = 0.15 / (32 * 32);
    570  const double kRatioThreshold = 1.25;
    571  const double kNormThreshold = 0.08 / (32 * 32);
    572  const double kVarThreshold = 0.005 / (double)n;
    573  const int num_blocks_w = (w + block_size - 1) / block_size;
    574  const int num_blocks_h = (h + block_size - 1) / block_size;
    575  int num_flat = 0;
    576  double *plane = (double *)aom_malloc(n * sizeof(*plane));
    577  double *block = (double *)aom_malloc(n * sizeof(*block));
    578  index_and_score_t *scores = (index_and_score_t *)aom_malloc(
    579      num_blocks_w * num_blocks_h * sizeof(*scores));
    580  if (plane == NULL || block == NULL || scores == NULL) {
    581    fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
    582    aom_free(plane);
    583    aom_free(block);
    584    aom_free(scores);
    585    return -1;
    586  }
    587 
    588 #ifdef NOISE_MODEL_LOG_SCORE
    589  fprintf(stderr, "score = [");
    590 #endif
    591  for (int by = 0; by < num_blocks_h; ++by) {
    592    for (int bx = 0; bx < num_blocks_w; ++bx) {
    593      // Compute gradient covariance matrix.
    594      aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
    595                                          bx * block_size, by * block_size,
    596                                          plane, block);
    597      double Gxx = 0, Gxy = 0, Gyy = 0;
    598      double mean = 0;
    599      double var = 0;
    600 
    601      for (int yi = 1; yi < block_size - 1; ++yi) {
    602        for (int xi = 1; xi < block_size - 1; ++xi) {
    603          const double gx = (block[yi * block_size + xi + 1] -
    604                             block[yi * block_size + xi - 1]) /
    605                            2;
    606          const double gy = (block[yi * block_size + xi + block_size] -
    607                             block[yi * block_size + xi - block_size]) /
    608                            2;
    609          Gxx += gx * gx;
    610          Gxy += gx * gy;
    611          Gyy += gy * gy;
    612 
    613          const double value = block[yi * block_size + xi];
    614          mean += value;
    615          var += value * value;
    616        }
    617      }
    618      mean /= (block_size - 2) * (block_size - 2);
    619 
    620      // Normalize gradients by block_size.
    621      Gxx /= ((block_size - 2) * (block_size - 2));
    622      Gxy /= ((block_size - 2) * (block_size - 2));
    623      Gyy /= ((block_size - 2) * (block_size - 2));
    624      var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
    625 
    626      {
    627        const double trace = Gxx + Gyy;
    628        const double det = Gxx * Gyy - Gxy * Gxy;
    629        const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
    630        const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
    631        const double norm = e1;  // Spectral norm
    632        const double ratio = (e1 / AOMMAX(e2, 1e-6));
    633        const int is_flat = (trace < kTraceThreshold) &&
    634                            (ratio < kRatioThreshold) &&
    635                            (norm < kNormThreshold) && (var > kVarThreshold);
    636        // The following weights are used to combine the above features to give
    637        // a sigmoid score for flatness. If the input was normalized to [0,100]
    638        // the magnitude of these values would be close to 1 (e.g., weights
    639        // corresponding to variance would be a factor of 10000x smaller).
    640        // The weights are given in the following order:
    641        //    [{var}, {ratio}, {trace}, {norm}, offset]
    642        // with one of the most discriminative being simply the variance.
    643        const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
    644        double sum_weights = weights[0] * var + weights[1] * ratio +
    645                             weights[2] * trace + weights[3] * norm +
    646                             weights[4];
    647        // clamp the value to [-25.0, 100.0] to prevent overflow
    648        sum_weights = fclamp(sum_weights, -25.0, 100.0);
    649        const float score = (float)(1.0 / (1 + exp(-sum_weights)));
    650        flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
    651        scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
    652        scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
    653 #ifdef NOISE_MODEL_LOG_SCORE
    654        fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
    655                is_flat);
    656 #endif
    657        num_flat += is_flat;
    658      }
    659    }
    660 #ifdef NOISE_MODEL_LOG_SCORE
    661    fprintf(stderr, "\n");
    662 #endif
    663  }
    664 #ifdef NOISE_MODEL_LOG_SCORE
    665  fprintf(stderr, "];\n");
    666 #endif
    667  // Find the top-scored blocks (most likely to be flat) and set the flat blocks
    668  // be the union of the thresholded results and the top 10th percentile of the
    669  // scored results.
    670  qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
    671  const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
    672  const float score_threshold = scores[top_nth_percentile].score;
    673  for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
    674    if (scores[i].score >= score_threshold) {
    675      num_flat += flat_blocks[scores[i].index] == 0;
    676      flat_blocks[scores[i].index] |= 1;
    677    }
    678  }
    679  aom_free(block);
    680  aom_free(plane);
    681  aom_free(scores);
    682  return num_flat;
    683 }
    684 
    685 int aom_noise_model_init(aom_noise_model_t *model,
    686                         const aom_noise_model_params_t params) {
    687  const int n = num_coeffs(params);
    688  const int lag = params.lag;
    689  const int bit_depth = params.bit_depth;
    690  int x = 0, y = 0, i = 0, c = 0;
    691 
    692  memset(model, 0, sizeof(*model));
    693  if (params.lag < 1) {
    694    fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
    695    return 0;
    696  }
    697  if (params.lag > kMaxLag) {
    698    fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
    699            kMaxLag);
    700    return 0;
    701  }
    702  if (!(params.bit_depth == 8 || params.bit_depth == 10 ||
    703        params.bit_depth == 12)) {
    704    return 0;
    705  }
    706 
    707  model->params = params;
    708  for (c = 0; c < 3; ++c) {
    709    if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
    710      fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
    711      aom_noise_model_free(model);
    712      return 0;
    713    }
    714    if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
    715      fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
    716      aom_noise_model_free(model);
    717      return 0;
    718    }
    719  }
    720  model->n = n;
    721  model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
    722  if (!model->coords) {
    723    aom_noise_model_free(model);
    724    return 0;
    725  }
    726 
    727  for (y = -lag; y <= 0; ++y) {
    728    const int max_x = y == 0 ? -1 : lag;
    729    for (x = -lag; x <= max_x; ++x) {
    730      switch (params.shape) {
    731        case AOM_NOISE_SHAPE_DIAMOND:
    732          if (abs(x) <= y + lag) {
    733            model->coords[i][0] = x;
    734            model->coords[i][1] = y;
    735            ++i;
    736          }
    737          break;
    738        case AOM_NOISE_SHAPE_SQUARE:
    739          model->coords[i][0] = x;
    740          model->coords[i][1] = y;
    741          ++i;
    742          break;
    743        default:
    744          fprintf(stderr, "Invalid shape\n");
    745          aom_noise_model_free(model);
    746          return 0;
    747      }
    748    }
    749  }
    750  assert(i == n);
    751  return 1;
    752 }
    753 
    754 void aom_noise_model_free(aom_noise_model_t *model) {
    755  int c = 0;
    756  if (!model) return;
    757 
    758  aom_free(model->coords);
    759  for (c = 0; c < 3; ++c) {
    760    equation_system_free(&model->latest_state[c].eqns);
    761    equation_system_free(&model->combined_state[c].eqns);
    762 
    763    equation_system_free(&model->latest_state[c].strength_solver.eqns);
    764    equation_system_free(&model->combined_state[c].strength_solver.eqns);
    765  }
    766  memset(model, 0, sizeof(*model));
    767 }
    768 
    769 // Extracts the neighborhood defined by coords around point (x, y) from
    770 // the difference between the data and denoised images. Also extracts the
    771 // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
    772 #define EXTRACT_AR_ROW(INT_TYPE, suffix)                                   \
    773  static double extract_ar_row_##suffix(                                   \
    774      int(*coords)[2], int num_coords, const INT_TYPE *const data,         \
    775      const INT_TYPE *const denoised, int stride, int sub_log2[2],         \
    776      const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised,  \
    777      int alt_stride, int x, int y, double *buffer) {                      \
    778    for (int i = 0; i < num_coords; ++i) {                                 \
    779      const int x_i = x + coords[i][0], y_i = y + coords[i][1];            \
    780      buffer[i] =                                                          \
    781          (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
    782    }                                                                      \
    783    const double val =                                                     \
    784        (double)data[y * stride + x] - denoised[y * stride + x];           \
    785                                                                           \
    786    if (alt_data && alt_denoised) {                                        \
    787      double avg_data = 0, avg_denoised = 0;                               \
    788      int num_samples = 0;                                                 \
    789      for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) {              \
    790        const int y_up = (y << sub_log2[1]) + dy_i;                        \
    791        for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) {            \
    792          const int x_up = (x << sub_log2[0]) + dx_i;                      \
    793          avg_data += alt_data[y_up * alt_stride + x_up];                  \
    794          avg_denoised += alt_denoised[y_up * alt_stride + x_up];          \
    795          num_samples++;                                                   \
    796        }                                                                  \
    797      }                                                                    \
    798      buffer[num_coords] = (avg_data - avg_denoised) / num_samples;        \
    799    }                                                                      \
    800    return val;                                                            \
    801  }
    802 
    803 EXTRACT_AR_ROW(uint8_t, lowbd)
    804 EXTRACT_AR_ROW(uint16_t, highbd)
    805 
    806 static int add_block_observations(
    807    aom_noise_model_t *noise_model, int c, const uint8_t *const data,
    808    const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
    809    const uint8_t *const alt_data, const uint8_t *const alt_denoised,
    810    int alt_stride, const uint8_t *const flat_blocks, int block_size,
    811    int num_blocks_w, int num_blocks_h) {
    812  const int lag = noise_model->params.lag;
    813  const int num_coords = noise_model->n;
    814  const double normalization = (1 << noise_model->params.bit_depth) - 1;
    815  double *A = noise_model->latest_state[c].eqns.A;
    816  double *b = noise_model->latest_state[c].eqns.b;
    817  double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
    818  const int n = noise_model->latest_state[c].eqns.n;
    819 
    820  if (!buffer) {
    821    fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
    822    return 0;
    823  }
    824  for (int by = 0; by < num_blocks_h; ++by) {
    825    const int y_o = by * (block_size >> sub_log2[1]);
    826    for (int bx = 0; bx < num_blocks_w; ++bx) {
    827      const int x_o = bx * (block_size >> sub_log2[0]);
    828      if (!flat_blocks[by * num_blocks_w + bx]) {
    829        continue;
    830      }
    831      int y_start =
    832          (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
    833      int x_start =
    834          (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
    835      int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
    836                         block_size >> sub_log2[1]);
    837      int x_end = AOMMIN(
    838          (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
    839          (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
    840              ? (block_size >> sub_log2[0])
    841              : ((block_size >> sub_log2[0]) - lag));
    842      for (int y = y_start; y < y_end; ++y) {
    843        for (int x = x_start; x < x_end; ++x) {
    844          const double val =
    845              noise_model->params.use_highbd
    846                  ? extract_ar_row_highbd(noise_model->coords, num_coords,
    847                                          (const uint16_t *const)data,
    848                                          (const uint16_t *const)denoised,
    849                                          stride, sub_log2,
    850                                          (const uint16_t *const)alt_data,
    851                                          (const uint16_t *const)alt_denoised,
    852                                          alt_stride, x + x_o, y + y_o, buffer)
    853                  : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
    854                                         denoised, stride, sub_log2, alt_data,
    855                                         alt_denoised, alt_stride, x + x_o,
    856                                         y + y_o, buffer);
    857          for (int i = 0; i < n; ++i) {
    858            for (int j = 0; j < n; ++j) {
    859              A[i * n + j] +=
    860                  (buffer[i] * buffer[j]) / (normalization * normalization);
    861            }
    862            b[i] += (buffer[i] * val) / (normalization * normalization);
    863          }
    864          noise_model->latest_state[c].num_observations++;
    865        }
    866      }
    867    }
    868  }
    869  aom_free(buffer);
    870  return 1;
    871 }
    872 
    873 static void add_noise_std_observations(
    874    aom_noise_model_t *noise_model, int c, const double *coeffs,
    875    const uint8_t *const data, const uint8_t *const denoised, int w, int h,
    876    int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
    877    const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
    878    int num_blocks_h) {
    879  const int num_coords = noise_model->n;
    880  aom_noise_strength_solver_t *noise_strength_solver =
    881      &noise_model->latest_state[c].strength_solver;
    882 
    883  const aom_noise_strength_solver_t *noise_strength_luma =
    884      &noise_model->latest_state[0].strength_solver;
    885  const double luma_gain = noise_model->latest_state[0].ar_gain;
    886  const double noise_gain = noise_model->latest_state[c].ar_gain;
    887  for (int by = 0; by < num_blocks_h; ++by) {
    888    const int y_o = by * (block_size >> sub_log2[1]);
    889    for (int bx = 0; bx < num_blocks_w; ++bx) {
    890      const int x_o = bx * (block_size >> sub_log2[0]);
    891      if (!flat_blocks[by * num_blocks_w + bx]) {
    892        continue;
    893      }
    894      const int num_samples_h =
    895          AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
    896                 block_size >> sub_log2[1]);
    897      const int num_samples_w =
    898          AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
    899                 (block_size >> sub_log2[0]));
    900      // Make sure that we have a reasonable amount of samples to consider the
    901      // block
    902      if (num_samples_w * num_samples_h > block_size) {
    903        const double block_mean = get_block_mean(
    904            alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
    905            x_o << sub_log2[0], y_o << sub_log2[1], block_size,
    906            noise_model->params.use_highbd);
    907        const double noise_var = get_noise_var(
    908            data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
    909            y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
    910            noise_model->params.use_highbd);
    911        // We want to remove the part of the noise that came from being
    912        // correlated with luma. Note that the noise solver for luma must
    913        // have already been run.
    914        const double luma_strength =
    915            c > 0 ? luma_gain * noise_strength_solver_get_value(
    916                                    noise_strength_luma, block_mean)
    917                  : 0;
    918        const double corr = c > 0 ? coeffs[num_coords] : 0;
    919        // Chroma noise:
    920        //    N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
    921        // The uncorrelated component:
    922        //   uncorr_var = noise_var - (corr * luma_strength)^2
    923        // But don't allow fully correlated noise (hence the max), since the
    924        // synthesis cannot model it.
    925        const double uncorr_std = sqrt(
    926            AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
    927        // After we've removed correlation with luma, undo the gain that will
    928        // come from running the IIR filter.
    929        const double adjusted_strength = uncorr_std / noise_gain;
    930        aom_noise_strength_solver_add_measurement(
    931            noise_strength_solver, block_mean, adjusted_strength);
    932      }
    933    }
    934  }
    935 }
    936 
    937 // Return true if the noise estimate appears to be different from the combined
    938 // (multi-frame) estimate. The difference is measured by checking whether the
    939 // AR coefficients have diverged (using a threshold on normalized cross
    940 // correlation), or whether the noise strength has changed.
    941 static int is_noise_model_different(aom_noise_model_t *const noise_model) {
    942  // These thresholds are kind of arbitrary and will likely need further tuning
    943  // (or exported as parameters). The threshold on noise strength is a weighted
    944  // difference between the noise strength histograms
    945  const double kCoeffThreshold = 0.9;
    946  const double kStrengthThreshold =
    947      0.005 * (1 << (noise_model->params.bit_depth - 8));
    948  for (int c = 0; c < 1; ++c) {
    949    const double corr =
    950        aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
    951                                         noise_model->combined_state[c].eqns.x,
    952                                         noise_model->combined_state[c].eqns.n);
    953    if (corr < kCoeffThreshold) return 1;
    954 
    955    const double dx =
    956        1.0 / noise_model->latest_state[c].strength_solver.num_bins;
    957 
    958    const aom_equation_system_t *latest_eqns =
    959        &noise_model->latest_state[c].strength_solver.eqns;
    960    const aom_equation_system_t *combined_eqns =
    961        &noise_model->combined_state[c].strength_solver.eqns;
    962    double diff = 0;
    963    double total_weight = 0;
    964    for (int j = 0; j < latest_eqns->n; ++j) {
    965      double weight = 0;
    966      for (int i = 0; i < latest_eqns->n; ++i) {
    967        weight += latest_eqns->A[i * latest_eqns->n + j];
    968      }
    969      weight = sqrt(weight);
    970      diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
    971      total_weight += weight;
    972    }
    973    if (diff * dx / total_weight > kStrengthThreshold) return 1;
    974  }
    975  return 0;
    976 }
    977 
    978 static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
    979  const int ret = equation_system_solve(&state->eqns);
    980  state->ar_gain = 1.0;
    981  if (!ret) return ret;
    982 
    983  // Update the AR gain from the equation system as it will be used to fit
    984  // the noise strength as a function of intensity.  In the Yule-Walker
    985  // equations, the diagonal should be the variance of the correlated noise.
    986  // In the case of the least squares estimate, there will be some variability
    987  // in the diagonal. So use the mean of the diagonal as the estimate of
    988  // overall variance (this works for least squares or Yule-Walker formulation).
    989  double var = 0;
    990  const int n = state->eqns.n;
    991  for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
    992    var += state->eqns.A[i * n + i] / state->num_observations;
    993  }
    994  var /= (n - is_chroma);
    995 
    996  // Keep track of E(Y^2) = <b, x> + E(X^2)
    997  // In the case that we are using chroma and have an estimate of correlation
    998  // with luma we adjust that estimate slightly to remove the correlated bits by
    999  // subtracting out the last column of a scaled by our correlation estimate
   1000  // from b. E(y^2) = <b - A(:, end)*x(end), x>
   1001  double sum_covar = 0;
   1002  for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
   1003    double bi = state->eqns.b[i];
   1004    if (is_chroma) {
   1005      bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
   1006    }
   1007    sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
   1008  }
   1009  // Now, get an estimate of the variance of uncorrelated noise signal and use
   1010  // it to determine the gain of the AR filter.
   1011  const double noise_var = AOMMAX(var - sum_covar, 1e-6);
   1012  state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
   1013  return ret;
   1014 }
   1015 
   1016 aom_noise_status_t aom_noise_model_update(
   1017    aom_noise_model_t *const noise_model, const uint8_t *const data[3],
   1018    const uint8_t *const denoised[3], int w, int h, int stride[3],
   1019    int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
   1020  const int num_blocks_w = (w + block_size - 1) / block_size;
   1021  const int num_blocks_h = (h + block_size - 1) / block_size;
   1022  int y_model_different = 0;
   1023  int num_blocks = 0;
   1024  int i = 0, channel = 0;
   1025 
   1026  if (block_size <= 1) {
   1027    fprintf(stderr, "block_size = %d must be > 1\n", block_size);
   1028    return AOM_NOISE_STATUS_INVALID_ARGUMENT;
   1029  }
   1030 
   1031  if (block_size < noise_model->params.lag * 2 + 1) {
   1032    fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
   1033            noise_model->params.lag * 2 + 1);
   1034    return AOM_NOISE_STATUS_INVALID_ARGUMENT;
   1035  }
   1036 
   1037  // Clear the latest equation system
   1038  for (i = 0; i < 3; ++i) {
   1039    equation_system_clear(&noise_model->latest_state[i].eqns);
   1040    noise_model->latest_state[i].num_observations = 0;
   1041    noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
   1042  }
   1043 
   1044  // Check that we have enough flat blocks
   1045  for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
   1046    if (flat_blocks[i]) {
   1047      num_blocks++;
   1048    }
   1049  }
   1050 
   1051  if (num_blocks <= 1) {
   1052    fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
   1053    return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
   1054  }
   1055 
   1056  for (channel = 0; channel < 3; ++channel) {
   1057    int no_subsampling[2] = { 0, 0 };
   1058    const uint8_t *alt_data = channel > 0 ? data[0] : 0;
   1059    const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
   1060    int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
   1061    const int is_chroma = channel != 0;
   1062    if (!data[channel] || !denoised[channel]) break;
   1063    if (!add_block_observations(noise_model, channel, data[channel],
   1064                                denoised[channel], w, h, stride[channel], sub,
   1065                                alt_data, alt_denoised, stride[0], flat_blocks,
   1066                                block_size, num_blocks_w, num_blocks_h)) {
   1067      fprintf(stderr, "Adding block observation failed\n");
   1068      return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1069    }
   1070 
   1071    if (!ar_equation_system_solve(&noise_model->latest_state[channel],
   1072                                  is_chroma)) {
   1073      if (is_chroma) {
   1074        set_chroma_coefficient_fallback_soln(
   1075            &noise_model->latest_state[channel].eqns);
   1076      } else {
   1077        fprintf(stderr, "Solving latest noise equation system failed %d!\n",
   1078                channel);
   1079        return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1080      }
   1081    }
   1082 
   1083    add_noise_std_observations(
   1084        noise_model, channel, noise_model->latest_state[channel].eqns.x,
   1085        data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
   1086        stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
   1087 
   1088    if (!aom_noise_strength_solver_solve(
   1089            &noise_model->latest_state[channel].strength_solver)) {
   1090      fprintf(stderr, "Solving latest noise strength failed!\n");
   1091      return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1092    }
   1093 
   1094    // Check noise characteristics and return if error.
   1095    if (channel == 0 &&
   1096        noise_model->combined_state[channel].strength_solver.num_equations >
   1097            0 &&
   1098        is_noise_model_different(noise_model)) {
   1099      y_model_different = 1;
   1100    }
   1101 
   1102    // Don't update the combined stats if the y model is different.
   1103    if (y_model_different) continue;
   1104 
   1105    noise_model->combined_state[channel].num_observations +=
   1106        noise_model->latest_state[channel].num_observations;
   1107    equation_system_add(&noise_model->combined_state[channel].eqns,
   1108                        &noise_model->latest_state[channel].eqns);
   1109    if (!ar_equation_system_solve(&noise_model->combined_state[channel],
   1110                                  is_chroma)) {
   1111      if (is_chroma) {
   1112        set_chroma_coefficient_fallback_soln(
   1113            &noise_model->combined_state[channel].eqns);
   1114      } else {
   1115        fprintf(stderr, "Solving combined noise equation system failed %d!\n",
   1116                channel);
   1117        return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1118      }
   1119    }
   1120 
   1121    noise_strength_solver_add(
   1122        &noise_model->combined_state[channel].strength_solver,
   1123        &noise_model->latest_state[channel].strength_solver);
   1124 
   1125    if (!aom_noise_strength_solver_solve(
   1126            &noise_model->combined_state[channel].strength_solver)) {
   1127      fprintf(stderr, "Solving combined noise strength failed!\n");
   1128      return AOM_NOISE_STATUS_INTERNAL_ERROR;
   1129    }
   1130  }
   1131 
   1132  return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
   1133                           : AOM_NOISE_STATUS_OK;
   1134 }
   1135 
   1136 void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
   1137  for (int c = 0; c < 3; c++) {
   1138    equation_system_copy(&noise_model->combined_state[c].eqns,
   1139                         &noise_model->latest_state[c].eqns);
   1140    equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
   1141                         &noise_model->latest_state[c].strength_solver.eqns);
   1142    noise_model->combined_state[c].strength_solver.num_equations =
   1143        noise_model->latest_state[c].strength_solver.num_equations;
   1144    noise_model->combined_state[c].num_observations =
   1145        noise_model->latest_state[c].num_observations;
   1146    noise_model->combined_state[c].ar_gain =
   1147        noise_model->latest_state[c].ar_gain;
   1148  }
   1149 }
   1150 
   1151 int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
   1152                                         aom_film_grain_t *film_grain) {
   1153  if (noise_model->params.lag > 3) {
   1154    fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
   1155    return 0;
   1156  }
   1157  uint16_t random_seed = film_grain->random_seed;
   1158  memset(film_grain, 0, sizeof(*film_grain));
   1159  film_grain->random_seed = random_seed;
   1160 
   1161  film_grain->apply_grain = 1;
   1162  film_grain->update_parameters = 1;
   1163 
   1164  film_grain->ar_coeff_lag = noise_model->params.lag;
   1165 
   1166  // Convert the scaling functions to 8 bit values
   1167  aom_noise_strength_lut_t scaling_points[3];
   1168  if (!aom_noise_strength_solver_fit_piecewise(
   1169          &noise_model->combined_state[0].strength_solver, 14,
   1170          scaling_points + 0)) {
   1171    return 0;
   1172  }
   1173  if (!aom_noise_strength_solver_fit_piecewise(
   1174          &noise_model->combined_state[1].strength_solver, 10,
   1175          scaling_points + 1)) {
   1176    aom_noise_strength_lut_free(scaling_points + 0);
   1177    return 0;
   1178  }
   1179  if (!aom_noise_strength_solver_fit_piecewise(
   1180          &noise_model->combined_state[2].strength_solver, 10,
   1181          scaling_points + 2)) {
   1182    aom_noise_strength_lut_free(scaling_points + 0);
   1183    aom_noise_strength_lut_free(scaling_points + 1);
   1184    return 0;
   1185  }
   1186 
   1187  // Both the domain and the range of the scaling functions in the film_grain
   1188  // are normalized to 8-bit (e.g., they are implicitly scaled during grain
   1189  // synthesis).
   1190  const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
   1191  double max_scaling_value = 1e-4;
   1192  for (int c = 0; c < 3; ++c) {
   1193    for (int i = 0; i < scaling_points[c].num_points; ++i) {
   1194      scaling_points[c].points[i][0] =
   1195          AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
   1196      scaling_points[c].points[i][1] =
   1197          AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
   1198      max_scaling_value =
   1199          AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
   1200    }
   1201  }
   1202 
   1203  // Scaling_shift values are in the range [8,11]
   1204  const int max_scaling_value_log2 =
   1205      clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
   1206  film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
   1207 
   1208  const double scale_factor = 1 << (8 - max_scaling_value_log2);
   1209  film_grain->num_y_points = scaling_points[0].num_points;
   1210  film_grain->num_cb_points = scaling_points[1].num_points;
   1211  film_grain->num_cr_points = scaling_points[2].num_points;
   1212 
   1213  int(*film_grain_scaling[3])[2] = {
   1214    film_grain->scaling_points_y,
   1215    film_grain->scaling_points_cb,
   1216    film_grain->scaling_points_cr,
   1217  };
   1218  for (int c = 0; c < 3; c++) {
   1219    for (int i = 0; i < scaling_points[c].num_points; ++i) {
   1220      film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
   1221      film_grain_scaling[c][i][1] = clamp(
   1222          (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
   1223    }
   1224  }
   1225  aom_noise_strength_lut_free(scaling_points + 0);
   1226  aom_noise_strength_lut_free(scaling_points + 1);
   1227  aom_noise_strength_lut_free(scaling_points + 2);
   1228 
   1229  // Convert the ar_coeffs into 8-bit values
   1230  const int n_coeff = noise_model->combined_state[0].eqns.n;
   1231  double max_coeff = 1e-4, min_coeff = -1e-4;
   1232  double y_corr[2] = { 0, 0 };
   1233  double avg_luma_strength = 0;
   1234  for (int c = 0; c < 3; c++) {
   1235    aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
   1236    for (int i = 0; i < n_coeff; ++i) {
   1237      max_coeff = AOMMAX(max_coeff, eqns->x[i]);
   1238      min_coeff = AOMMIN(min_coeff, eqns->x[i]);
   1239    }
   1240    // Since the correlation between luma/chroma was computed in an already
   1241    // scaled space, we adjust it in the un-scaled space.
   1242    aom_noise_strength_solver_t *solver =
   1243        &noise_model->combined_state[c].strength_solver;
   1244    // Compute a weighted average of the strength for the channel.
   1245    double average_strength = 0, total_weight = 0;
   1246    for (int i = 0; i < solver->eqns.n; ++i) {
   1247      double w = 0;
   1248      for (int j = 0; j < solver->eqns.n; ++j) {
   1249        w += solver->eqns.A[i * solver->eqns.n + j];
   1250      }
   1251      w = sqrt(w);
   1252      average_strength += solver->eqns.x[i] * w;
   1253      total_weight += w;
   1254    }
   1255    if (total_weight == 0)
   1256      average_strength = 1;
   1257    else
   1258      average_strength /= total_weight;
   1259    if (c == 0) {
   1260      avg_luma_strength = average_strength;
   1261    } else {
   1262      y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
   1263      max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
   1264      min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
   1265    }
   1266  }
   1267  // Shift value: AR coeffs range (values 6-9)
   1268  // 6: [-2, 2),  7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
   1269  film_grain->ar_coeff_shift =
   1270      clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
   1271            6, 9);
   1272  double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
   1273  int *ar_coeffs[3] = {
   1274    film_grain->ar_coeffs_y,
   1275    film_grain->ar_coeffs_cb,
   1276    film_grain->ar_coeffs_cr,
   1277  };
   1278  for (int c = 0; c < 3; ++c) {
   1279    aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
   1280    for (int i = 0; i < n_coeff; ++i) {
   1281      ar_coeffs[c][i] =
   1282          clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
   1283    }
   1284    if (c > 0) {
   1285      ar_coeffs[c][n_coeff] =
   1286          clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
   1287    }
   1288  }
   1289 
   1290  // At the moment, the noise modeling code assumes that the chroma scaling
   1291  // functions are a function of luma.
   1292  film_grain->cb_mult = 128;       // 8 bits
   1293  film_grain->cb_luma_mult = 192;  // 8 bits
   1294  film_grain->cb_offset = 256;     // 9 bits
   1295 
   1296  film_grain->cr_mult = 128;       // 8 bits
   1297  film_grain->cr_luma_mult = 192;  // 8 bits
   1298  film_grain->cr_offset = 256;     // 9 bits
   1299 
   1300  film_grain->chroma_scaling_from_luma = 0;
   1301  film_grain->grain_scale_shift = 0;
   1302  film_grain->overlap_flag = 1;
   1303  return 1;
   1304 }
   1305 
   1306 static void pointwise_multiply(const float *a, float *b, int n) {
   1307  for (int i = 0; i < n; ++i) {
   1308    b[i] *= a[i];
   1309  }
   1310 }
   1311 
   1312 static float *get_half_cos_window(int block_size) {
   1313  float *window_function =
   1314      (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
   1315  if (!window_function) return NULL;
   1316  for (int y = 0; y < block_size; ++y) {
   1317    const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
   1318    for (int x = 0; x < block_size; ++x) {
   1319      const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
   1320      window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
   1321    }
   1322  }
   1323  return window_function;
   1324 }
   1325 
   1326 #define DITHER_AND_QUANTIZE(INT_TYPE, suffix)                               \
   1327  static void dither_and_quantize_##suffix(                                 \
   1328      float *result, int result_stride, INT_TYPE *denoised, int w, int h,   \
   1329      int stride, int chroma_sub_w, int chroma_sub_h, int block_size,       \
   1330      float block_normalization) {                                          \
   1331    for (int y = 0; y < (h >> chroma_sub_h); ++y) {                         \
   1332      for (int x = 0; x < (w >> chroma_sub_w); ++x) {                       \
   1333        const int result_idx =                                              \
   1334            (y + (block_size >> chroma_sub_h)) * result_stride + x +        \
   1335            (block_size >> chroma_sub_w);                                   \
   1336        INT_TYPE new_val = (INT_TYPE)AOMMIN(                                \
   1337            AOMMAX(result[result_idx] * block_normalization + 0.5f, 0),     \
   1338            block_normalization);                                           \
   1339        const float err =                                                   \
   1340            -(((float)new_val) / block_normalization - result[result_idx]); \
   1341        denoised[y * stride + x] = new_val;                                 \
   1342        if (x + 1 < (w >> chroma_sub_w)) {                                  \
   1343          result[result_idx + 1] += err * 7.0f / 16.0f;                     \
   1344        }                                                                   \
   1345        if (y + 1 < (h >> chroma_sub_h)) {                                  \
   1346          if (x > 0) {                                                      \
   1347            result[result_idx + result_stride - 1] += err * 3.0f / 16.0f;   \
   1348          }                                                                 \
   1349          result[result_idx + result_stride] += err * 5.0f / 16.0f;         \
   1350          if (x + 1 < (w >> chroma_sub_w)) {                                \
   1351            result[result_idx + result_stride + 1] += err * 1.0f / 16.0f;   \
   1352          }                                                                 \
   1353        }                                                                   \
   1354      }                                                                     \
   1355    }                                                                       \
   1356  }
   1357 
   1358 DITHER_AND_QUANTIZE(uint8_t, lowbd)
   1359 DITHER_AND_QUANTIZE(uint16_t, highbd)
   1360 
   1361 int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
   1362                          int w, int h, int stride[3], int chroma_sub[2],
   1363                          float *noise_psd[3], int block_size, int bit_depth,
   1364                          int use_highbd) {
   1365  float *plane = NULL, *block = NULL, *window_full = NULL,
   1366        *window_chroma = NULL;
   1367  double *block_d = NULL, *plane_d = NULL;
   1368  struct aom_noise_tx_t *tx_full = NULL;
   1369  struct aom_noise_tx_t *tx_chroma = NULL;
   1370  const int num_blocks_w = (w + block_size - 1) / block_size;
   1371  const int num_blocks_h = (h + block_size - 1) / block_size;
   1372  const int result_stride = (num_blocks_w + 2) * block_size;
   1373  const int result_height = (num_blocks_h + 2) * block_size;
   1374  float *result = NULL;
   1375  int init_success = 1;
   1376  aom_flat_block_finder_t block_finder_full;
   1377  aom_flat_block_finder_t block_finder_chroma;
   1378  const float kBlockNormalization = (float)((1 << bit_depth) - 1);
   1379  if (chroma_sub[0] != chroma_sub[1]) {
   1380    fprintf(stderr,
   1381            "aom_wiener_denoise_2d doesn't handle different chroma "
   1382            "subsampling\n");
   1383    return 0;
   1384  }
   1385  init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
   1386                                             bit_depth, use_highbd);
   1387  result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
   1388                               sizeof(*result));
   1389  plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
   1390  block =
   1391      (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
   1392  block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
   1393  plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
   1394  window_full = get_half_cos_window(block_size);
   1395  tx_full = aom_noise_tx_malloc(block_size);
   1396 
   1397  if (chroma_sub[0] != 0) {
   1398    init_success &= aom_flat_block_finder_init(&block_finder_chroma,
   1399                                               block_size >> chroma_sub[0],
   1400                                               bit_depth, use_highbd);
   1401    window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
   1402    tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
   1403  } else {
   1404    window_chroma = window_full;
   1405    tx_chroma = tx_full;
   1406  }
   1407 
   1408  init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
   1409                  (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
   1410                  (window_full != NULL) && (window_chroma != NULL) &&
   1411                  (result != NULL);
   1412  for (int c = init_success ? 0 : 3; c < 3; ++c) {
   1413    float *window_function = c == 0 ? window_full : window_chroma;
   1414    aom_flat_block_finder_t *block_finder = &block_finder_full;
   1415    const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
   1416    const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
   1417    struct aom_noise_tx_t *tx =
   1418        (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
   1419    if (!data[c] || !denoised[c]) continue;
   1420    if (c > 0 && chroma_sub[0] != 0) {
   1421      block_finder = &block_finder_chroma;
   1422    }
   1423    memset(result, 0, sizeof(*result) * result_stride * result_height);
   1424    // Do overlapped block processing (half overlapped). The block rows can
   1425    // easily be done in parallel
   1426    for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
   1427         offsy += (block_size >> chroma_sub_h) / 2) {
   1428      for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
   1429           offsx += (block_size >> chroma_sub_w) / 2) {
   1430        // Pad the boundary when processing each block-set.
   1431        for (int by = -1; by < num_blocks_h; ++by) {
   1432          for (int bx = -1; bx < num_blocks_w; ++bx) {
   1433            const int pixels_per_block =
   1434                (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
   1435            aom_flat_block_finder_extract_block(
   1436                block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
   1437                stride[c], bx * (block_size >> chroma_sub_w) + offsx,
   1438                by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
   1439            for (int j = 0; j < pixels_per_block; ++j) {
   1440              block[j] = (float)block_d[j];
   1441              plane[j] = (float)plane_d[j];
   1442            }
   1443            pointwise_multiply(window_function, block, pixels_per_block);
   1444            aom_noise_tx_forward(tx, block);
   1445            aom_noise_tx_filter(tx, noise_psd[c]);
   1446            aom_noise_tx_inverse(tx, block);
   1447 
   1448            // Apply window function to the plane approximation (we will apply
   1449            // it to the sum of plane + block when composing the results).
   1450            pointwise_multiply(window_function, plane, pixels_per_block);
   1451 
   1452            for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
   1453              const int y_result =
   1454                  y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
   1455              for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
   1456                const int x_result =
   1457                    x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
   1458                result[y_result * result_stride + x_result] +=
   1459                    (block[y * (block_size >> chroma_sub_w) + x] +
   1460                     plane[y * (block_size >> chroma_sub_w) + x]) *
   1461                    window_function[y * (block_size >> chroma_sub_w) + x];
   1462              }
   1463            }
   1464          }
   1465        }
   1466      }
   1467    }
   1468    if (use_highbd) {
   1469      dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
   1470                                 w, h, stride[c], chroma_sub_w, chroma_sub_h,
   1471                                 block_size, kBlockNormalization);
   1472    } else {
   1473      dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
   1474                                stride[c], chroma_sub_w, chroma_sub_h,
   1475                                block_size, kBlockNormalization);
   1476    }
   1477  }
   1478  aom_free(result);
   1479  aom_free(plane);
   1480  aom_free(block);
   1481  aom_free(plane_d);
   1482  aom_free(block_d);
   1483  aom_free(window_full);
   1484 
   1485  aom_noise_tx_free(tx_full);
   1486 
   1487  aom_flat_block_finder_free(&block_finder_full);
   1488  if (chroma_sub[0] != 0) {
   1489    aom_flat_block_finder_free(&block_finder_chroma);
   1490    aom_free(window_chroma);
   1491    aom_noise_tx_free(tx_chroma);
   1492  }
   1493  return init_success;
   1494 }
   1495 
   1496 struct aom_denoise_and_model_t {
   1497  int block_size;
   1498  int bit_depth;
   1499  float noise_level;
   1500 
   1501  // Size of current denoised buffer and flat_block buffer
   1502  int width;
   1503  int height;
   1504  int y_stride;
   1505  int uv_stride;
   1506  int num_blocks_w;
   1507  int num_blocks_h;
   1508 
   1509  // Buffers for image and noise_psd allocated on the fly
   1510  float *noise_psd[3];
   1511  uint8_t *denoised[3];
   1512  uint8_t *flat_blocks;
   1513 
   1514  aom_flat_block_finder_t flat_block_finder;
   1515  aom_noise_model_t noise_model;
   1516 };
   1517 
   1518 struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
   1519                                                            int block_size,
   1520                                                            float noise_level) {
   1521  struct aom_denoise_and_model_t *ctx =
   1522      (struct aom_denoise_and_model_t *)aom_malloc(
   1523          sizeof(struct aom_denoise_and_model_t));
   1524  if (!ctx) {
   1525    fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
   1526    return NULL;
   1527  }
   1528  memset(ctx, 0, sizeof(*ctx));
   1529 
   1530  ctx->block_size = block_size;
   1531  ctx->noise_level = noise_level;
   1532  ctx->bit_depth = bit_depth;
   1533 
   1534  ctx->noise_psd[0] =
   1535      (float *)aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
   1536  ctx->noise_psd[1] =
   1537      (float *)aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
   1538  ctx->noise_psd[2] =
   1539      (float *)aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
   1540  if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
   1541    fprintf(stderr, "Unable to allocate noise PSD buffers\n");
   1542    aom_denoise_and_model_free(ctx);
   1543    return NULL;
   1544  }
   1545  return ctx;
   1546 }
   1547 
   1548 void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
   1549  aom_free(ctx->flat_blocks);
   1550  for (int i = 0; i < 3; ++i) {
   1551    aom_free(ctx->denoised[i]);
   1552    aom_free(ctx->noise_psd[i]);
   1553  }
   1554  aom_noise_model_free(&ctx->noise_model);
   1555  aom_flat_block_finder_free(&ctx->flat_block_finder);
   1556  aom_free(ctx);
   1557 }
   1558 
   1559 static int denoise_and_model_realloc_if_necessary(
   1560    struct aom_denoise_and_model_t *ctx, const YV12_BUFFER_CONFIG *sd) {
   1561  if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
   1562      ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
   1563    return 1;
   1564  const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
   1565  const int block_size = ctx->block_size;
   1566 
   1567  ctx->width = sd->y_width;
   1568  ctx->height = sd->y_height;
   1569  ctx->y_stride = sd->y_stride;
   1570  ctx->uv_stride = sd->uv_stride;
   1571 
   1572  for (int i = 0; i < 3; ++i) {
   1573    aom_free(ctx->denoised[i]);
   1574    ctx->denoised[i] = NULL;
   1575  }
   1576  aom_free(ctx->flat_blocks);
   1577  ctx->flat_blocks = NULL;
   1578 
   1579  ctx->denoised[0] =
   1580      (uint8_t *)aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
   1581  ctx->denoised[1] =
   1582      (uint8_t *)aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
   1583  ctx->denoised[2] =
   1584      (uint8_t *)aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
   1585  if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
   1586    fprintf(stderr, "Unable to allocate denoise buffers\n");
   1587    return 0;
   1588  }
   1589  ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
   1590  ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
   1591  ctx->flat_blocks =
   1592      (uint8_t *)aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
   1593  if (!ctx->flat_blocks) {
   1594    fprintf(stderr, "Unable to allocate flat_blocks buffer\n");
   1595    return 0;
   1596  }
   1597 
   1598  aom_flat_block_finder_free(&ctx->flat_block_finder);
   1599  if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
   1600                                  ctx->bit_depth, use_highbd)) {
   1601    fprintf(stderr, "Unable to init flat block finder\n");
   1602    return 0;
   1603  }
   1604 
   1605  const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
   1606                                            ctx->bit_depth, use_highbd };
   1607  aom_noise_model_free(&ctx->noise_model);
   1608  if (!aom_noise_model_init(&ctx->noise_model, params)) {
   1609    fprintf(stderr, "Unable to init noise model\n");
   1610    return 0;
   1611  }
   1612 
   1613  // Simply use a flat PSD (although we could use the flat blocks to estimate
   1614  // PSD) those to estimate an actual noise PSD)
   1615  const float y_noise_level =
   1616      aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
   1617  const float uv_noise_level = aom_noise_psd_get_default_value(
   1618      ctx->block_size >> sd->subsampling_x, ctx->noise_level);
   1619  for (int i = 0; i < block_size * block_size; ++i) {
   1620    ctx->noise_psd[0][i] = y_noise_level;
   1621    ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
   1622  }
   1623  return 1;
   1624 }
   1625 
   1626 // TODO(aomedia:3151): Handle a monochrome image (sd->u_buffer and sd->v_buffer
   1627 // are null pointers) correctly.
   1628 int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
   1629                              const YV12_BUFFER_CONFIG *sd,
   1630                              aom_film_grain_t *film_grain, int apply_denoise) {
   1631  const int block_size = ctx->block_size;
   1632  const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
   1633  uint8_t *raw_data[3] = {
   1634    use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
   1635    use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
   1636    use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
   1637  };
   1638  const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
   1639  int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
   1640  int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
   1641 
   1642  if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
   1643    fprintf(stderr, "Unable to realloc buffers\n");
   1644    return 0;
   1645  }
   1646 
   1647  aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
   1648                            sd->y_height, strides[0], ctx->flat_blocks);
   1649 
   1650  if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
   1651                             strides, chroma_sub_log2, ctx->noise_psd,
   1652                             block_size, ctx->bit_depth, use_highbd)) {
   1653    fprintf(stderr, "Unable to denoise image\n");
   1654    return 0;
   1655  }
   1656 
   1657  const aom_noise_status_t status = aom_noise_model_update(
   1658      &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
   1659      sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
   1660      block_size);
   1661  int have_noise_estimate = 0;
   1662  if (status == AOM_NOISE_STATUS_OK) {
   1663    have_noise_estimate = 1;
   1664  } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
   1665    aom_noise_model_save_latest(&ctx->noise_model);
   1666    have_noise_estimate = 1;
   1667  } else {
   1668    // Unable to update noise model; proceed if we have a previous estimate.
   1669    have_noise_estimate =
   1670        (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
   1671  }
   1672 
   1673  film_grain->apply_grain = 0;
   1674  if (have_noise_estimate) {
   1675    if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
   1676      fprintf(stderr, "Unable to get grain parameters.\n");
   1677      return 0;
   1678    }
   1679    if (!film_grain->random_seed) {
   1680      film_grain->random_seed = 7391;
   1681    }
   1682    if (apply_denoise) {
   1683      memcpy(raw_data[0], ctx->denoised[0],
   1684             (strides[0] * sd->y_height) << use_highbd);
   1685      if (!sd->monochrome) {
   1686        memcpy(raw_data[1], ctx->denoised[1],
   1687               (strides[1] * sd->uv_height) << use_highbd);
   1688        memcpy(raw_data[2], ctx->denoised[2],
   1689               (strides[2] * sd->uv_height) << use_highbd);
   1690      }
   1691    }
   1692  }
   1693  return 1;
   1694 }