[ tor-browser ].git.dasho

sparse_linear_solver.c (12591B)
      1 /*
      2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 #include "av1/common/av1_common_int.h"
     12 #include "av1/encoder/sparse_linear_solver.h"
     13 #include "config/aom_config.h"
     14 #include "aom_mem/aom_mem.h"
     15 #include "av1/common/alloccommon.h"
     16 
     17 #if CONFIG_OPTICAL_FLOW_API
     18 /*
     19 * Input:
     20 * rows: array of row positions
     21 * cols: array of column positions
     22 * values: array of element values
     23 * num_elem: total number of elements in the matrix
     24 * num_rows: number of rows in the matrix
     25 * num_cols: number of columns in the matrix
     26 *
     27 * Output:
     28 * sm: pointer to the sparse matrix to be initialized
     29 *
     30 * Return: 0  - success
     31 *         -1 - failed
     32 */
     33 int av1_init_sparse_mtx(const int *rows, const int *cols, const double *values,
     34                        int num_elem, int num_rows, int num_cols,
     35                        SPARSE_MTX *sm) {
     36  sm->n_elem = num_elem;
     37  sm->n_rows = num_rows;
     38  sm->n_cols = num_cols;
     39  if (num_elem == 0) {
     40    sm->row_pos = NULL;
     41    sm->col_pos = NULL;
     42    sm->value = NULL;
     43    return 0;
     44  }
     45  sm->row_pos = aom_calloc(num_elem, sizeof(*sm->row_pos));
     46  sm->col_pos = aom_calloc(num_elem, sizeof(*sm->col_pos));
     47  sm->value = aom_calloc(num_elem, sizeof(*sm->value));
     48 
     49  if (!sm->row_pos || !sm->col_pos || !sm->value) {
     50    av1_free_sparse_mtx_elems(sm);
     51    return -1;
     52  }
     53 
     54  memcpy(sm->row_pos, rows, num_elem * sizeof(*sm->row_pos));
     55  memcpy(sm->col_pos, cols, num_elem * sizeof(*sm->col_pos));
     56  memcpy(sm->value, values, num_elem * sizeof(*sm->value));
     57 
     58  return 0;
     59 }
     60 
     61 /*
     62 * Combines two sparse matrices (allocating new space).
     63 *
     64 * Input:
     65 * sm1, sm2: matrices to be combined
     66 * row_offset1, row_offset2: row offset of each matrix in the new matrix
     67 * col_offset1, col_offset2: column offset of each matrix in the new matrix
     68 * new_n_rows, new_n_cols: number of rows and columns in the new matrix
     69 *
     70 * Output:
     71 * sm: the combined matrix
     72 *
     73 * Return: 0  - success
     74 *         -1 - failed
     75 */
     76 int av1_init_combine_sparse_mtx(const SPARSE_MTX *sm1, const SPARSE_MTX *sm2,
     77                                SPARSE_MTX *sm, int row_offset1,
     78                                int col_offset1, int row_offset2,
     79                                int col_offset2, int new_n_rows,
     80                                int new_n_cols) {
     81  sm->n_elem = sm1->n_elem + sm2->n_elem;
     82  sm->n_cols = new_n_cols;
     83  sm->n_rows = new_n_rows;
     84 
     85  if (sm->n_elem == 0) {
     86    sm->row_pos = NULL;
     87    sm->col_pos = NULL;
     88    sm->value = NULL;
     89    return 0;
     90  }
     91 
     92  sm->row_pos = aom_calloc(sm->n_elem, sizeof(*sm->row_pos));
     93  sm->col_pos = aom_calloc(sm->n_elem, sizeof(*sm->col_pos));
     94  sm->value = aom_calloc(sm->n_elem, sizeof(*sm->value));
     95 
     96  if (!sm->row_pos || !sm->col_pos || !sm->value) {
     97    av1_free_sparse_mtx_elems(sm);
     98    return -1;
     99  }
    100 
    101  for (int i = 0; i < sm1->n_elem; i++) {
    102    sm->row_pos[i] = sm1->row_pos[i] + row_offset1;
    103    sm->col_pos[i] = sm1->col_pos[i] + col_offset1;
    104  }
    105  memcpy(sm->value, sm1->value, sm1->n_elem * sizeof(*sm1->value));
    106  int n_elem1 = sm1->n_elem;
    107  for (int i = 0; i < sm2->n_elem; i++) {
    108    sm->row_pos[n_elem1 + i] = sm2->row_pos[i] + row_offset2;
    109    sm->col_pos[n_elem1 + i] = sm2->col_pos[i] + col_offset2;
    110  }
    111  memcpy(sm->value + n_elem1, sm2->value, sm2->n_elem * sizeof(*sm2->value));
    112  return 0;
    113 }
    114 
    115 void av1_free_sparse_mtx_elems(SPARSE_MTX *sm) {
    116  sm->n_cols = 0;
    117  sm->n_rows = 0;
    118  if (sm->n_elem != 0) {
    119    aom_free(sm->row_pos);
    120    aom_free(sm->col_pos);
    121    aom_free(sm->value);
    122  }
    123  sm->n_elem = 0;
    124 }
    125 
    126 /*
    127 * Calculate matrix and vector multiplication: A*b
    128 *
    129 * Input:
    130 * sm: matrix A
    131 * srcv: the vector b to be multiplied to
    132 * dstl: the length of vectors
    133 *
    134 * Output:
    135 * dstv: pointer to the resulting vector
    136 */
    137 void av1_mtx_vect_multi_right(const SPARSE_MTX *sm, const double *srcv,
    138                              double *dstv, int dstl) {
    139  memset(dstv, 0, sizeof(*dstv) * dstl);
    140  for (int i = 0; i < sm->n_elem; i++) {
    141    dstv[sm->row_pos[i]] += srcv[sm->col_pos[i]] * sm->value[i];
    142  }
    143 }
    144 /*
    145 * Calculate matrix and vector multiplication: b*A
    146 *
    147 * Input:
    148 * sm: matrix A
    149 * srcv: the vector b to be multiplied to
    150 * dstl: the length of vectors
    151 *
    152 * Output:
    153 * dstv: pointer to the resulting vector
    154 */
    155 void av1_mtx_vect_multi_left(const SPARSE_MTX *sm, const double *srcv,
    156                             double *dstv, int dstl) {
    157  memset(dstv, 0, sizeof(*dstv) * dstl);
    158  for (int i = 0; i < sm->n_elem; i++) {
    159    dstv[sm->col_pos[i]] += srcv[sm->row_pos[i]] * sm->value[i];
    160  }
    161 }
    162 
    163 /*
    164 * Calculate inner product of two vectors
    165 *
    166 * Input:
    167 * src1, scr2: the vectors to be multiplied
    168 * src1l: length of the vectors
    169 *
    170 * Output:
    171 * the inner product
    172 */
    173 double av1_vect_vect_multi(const double *src1, int src1l, const double *src2) {
    174  double result = 0;
    175  for (int i = 0; i < src1l; i++) {
    176    result += src1[i] * src2[i];
    177  }
    178  return result;
    179 }
    180 
    181 /*
    182 * Multiply each element in the matrix sm with a constant c
    183 */
    184 void av1_constant_multiply_sparse_matrix(SPARSE_MTX *sm, double c) {
    185  for (int i = 0; i < sm->n_elem; i++) {
    186    sm->value[i] *= c;
    187  }
    188 }
    189 
    190 static inline void free_solver_local_buf(double *buf1, double *buf2,
    191                                         double *buf3, double *buf4,
    192                                         double *buf5, double *buf6,
    193                                         double *buf7) {
    194  aom_free(buf1);
    195  aom_free(buf2);
    196  aom_free(buf3);
    197  aom_free(buf4);
    198  aom_free(buf5);
    199  aom_free(buf6);
    200  aom_free(buf7);
    201 }
    202 
    203 /*
    204 * Solve for Ax = b
    205 * no requirement on A
    206 *
    207 * Input:
    208 * A: the sparse matrix
    209 * b: the vector b
    210 * bl: length of b
    211 * x: the vector x
    212 *
    213 * Output:
    214 * x: pointer to the solution vector
    215 *
    216 * Return: 0  - success
    217 *         -1 - failed
    218 */
    219 int av1_bi_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b,
    220                                     int bl, double *x) {
    221  double *r = NULL, *r_hat = NULL, *p = NULL, *p_hat = NULL, *Ap = NULL,
    222         *p_hatA = NULL, *x_hat = NULL;
    223  double alpha, beta, rtr, r_norm_2;
    224  double denormtemp;
    225 
    226  // initialize
    227  r = aom_calloc(bl, sizeof(*r));
    228  r_hat = aom_calloc(bl, sizeof(*r_hat));
    229  p = aom_calloc(bl, sizeof(*p));
    230  p_hat = aom_calloc(bl, sizeof(*p_hat));
    231  Ap = aom_calloc(bl, sizeof(*Ap));
    232  p_hatA = aom_calloc(bl, sizeof(*p_hatA));
    233  x_hat = aom_calloc(bl, sizeof(*x_hat));
    234  if (!r || !r_hat || !p || !p_hat || !Ap || !p_hatA || !x_hat) {
    235    free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
    236    return -1;
    237  }
    238 
    239  int i;
    240  for (i = 0; i < bl; i++) {
    241    r[i] = b[i];
    242    r_hat[i] = b[i];
    243    p[i] = r[i];
    244    p_hat[i] = r_hat[i];
    245    x[i] = 0;
    246    x_hat[i] = 0;
    247  }
    248  r_norm_2 = av1_vect_vect_multi(r_hat, bl, r);
    249  for (int k = 0; k < MAX_CG_SP_ITER; k++) {
    250    rtr = r_norm_2;
    251    av1_mtx_vect_multi_right(A, p, Ap, bl);
    252    av1_mtx_vect_multi_left(A, p_hat, p_hatA, bl);
    253 
    254    denormtemp = av1_vect_vect_multi(p_hat, bl, Ap);
    255    if (denormtemp < 1e-10) break;
    256    alpha = rtr / denormtemp;
    257    r_norm_2 = 0;
    258    for (i = 0; i < bl; i++) {
    259      x[i] += alpha * p[i];
    260      x_hat[i] += alpha * p_hat[i];
    261      r[i] -= alpha * Ap[i];
    262      r_hat[i] -= alpha * p_hatA[i];
    263      r_norm_2 += r_hat[i] * r[i];
    264    }
    265    if (sqrt(r_norm_2) < 1e-2) {
    266      break;
    267    }
    268    if (rtr < 1e-10) break;
    269    beta = r_norm_2 / rtr;
    270    for (i = 0; i < bl; i++) {
    271      p[i] = r[i] + beta * p[i];
    272      p_hat[i] = r_hat[i] + beta * p_hat[i];
    273    }
    274  }
    275  // free
    276  free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
    277  return 0;
    278 }
    279 
    280 /*
    281 * Solve for Ax = b when A is symmetric and positive definite
    282 *
    283 * Input:
    284 * A: the sparse matrix
    285 * b: the vector b
    286 * bl: length of b
    287 * x: the vector x
    288 *
    289 * Output:
    290 * x: pointer to the solution vector
    291 *
    292 * Return: 0  - success
    293 *         -1 - failed
    294 */
    295 int av1_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b, int bl,
    296                                  double *x) {
    297  double *r = NULL, *p = NULL, *Ap = NULL;
    298  double alpha, beta, rtr, r_norm_2;
    299  double denormtemp;
    300 
    301  // initialize
    302  r = aom_calloc(bl, sizeof(*r));
    303  p = aom_calloc(bl, sizeof(*p));
    304  Ap = aom_calloc(bl, sizeof(*Ap));
    305  if (!r || !p || !Ap) {
    306    free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
    307    return -1;
    308  }
    309 
    310  int i;
    311  for (i = 0; i < bl; i++) {
    312    r[i] = b[i];
    313    p[i] = r[i];
    314    x[i] = 0;
    315  }
    316  r_norm_2 = av1_vect_vect_multi(r, bl, r);
    317  int k;
    318  for (k = 0; k < MAX_CG_SP_ITER; k++) {
    319    rtr = r_norm_2;
    320    av1_mtx_vect_multi_right(A, p, Ap, bl);
    321    denormtemp = av1_vect_vect_multi(p, bl, Ap);
    322    if (denormtemp < 1e-10) break;
    323    alpha = rtr / denormtemp;
    324    r_norm_2 = 0;
    325    for (i = 0; i < bl; i++) {
    326      x[i] += alpha * p[i];
    327      r[i] -= alpha * Ap[i];
    328      r_norm_2 += r[i] * r[i];
    329    }
    330    if (r_norm_2 < 1e-8 * bl) break;
    331    if (rtr < 1e-10) break;
    332    beta = r_norm_2 / rtr;
    333    for (i = 0; i < bl; i++) {
    334      p[i] = r[i] + beta * p[i];
    335    }
    336  }
    337  // free
    338  free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
    339 
    340  return 0;
    341 }
    342 
    343 /*
    344 * Solve for Ax = b using Jacobi method
    345 *
    346 * Input:
    347 * A: the sparse matrix
    348 * b: the vector b
    349 * bl: length of b
    350 * x: the vector x
    351 *
    352 * Output:
    353 * x: pointer to the solution vector
    354 *
    355 * Return: 0  - success
    356 *         -1 - failed
    357 */
    358 int av1_jacobi_sparse(const SPARSE_MTX *A, const double *b, int bl, double *x) {
    359  double *diags = NULL, *Rx = NULL, *x_last = NULL, *x_cur = NULL,
    360         *tempx = NULL;
    361  double resi2;
    362 
    363  diags = aom_calloc(bl, sizeof(*diags));
    364  Rx = aom_calloc(bl, sizeof(*Rx));
    365  x_last = aom_calloc(bl, sizeof(*x_last));
    366  x_cur = aom_calloc(bl, sizeof(*x_cur));
    367 
    368  if (!diags || !Rx || !x_last || !x_cur) {
    369    free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
    370    return -1;
    371  }
    372 
    373  int i;
    374  memset(x_last, 0, sizeof(*x_last) * bl);
    375  // get the diagonals of A
    376  memset(diags, 0, sizeof(*diags) * bl);
    377  for (int c = 0; c < A->n_elem; c++) {
    378    if (A->row_pos[c] != A->col_pos[c]) continue;
    379    diags[A->row_pos[c]] = A->value[c];
    380  }
    381  int k;
    382  for (k = 0; k < MAX_CG_SP_ITER; k++) {
    383    // R = A - diag(diags)
    384    // get R*x_last
    385    memset(Rx, 0, sizeof(*Rx) * bl);
    386    for (int c = 0; c < A->n_elem; c++) {
    387      if (A->row_pos[c] == A->col_pos[c]) continue;
    388      Rx[A->row_pos[c]] += x_last[A->col_pos[c]] * A->value[c];
    389    }
    390    resi2 = 0;
    391    for (i = 0; i < bl; i++) {
    392      x_cur[i] = (b[i] - Rx[i]) / diags[i];
    393      resi2 += (x_last[i] - x_cur[i]) * (x_last[i] - x_cur[i]);
    394    }
    395    if (resi2 <= 1e-10 * bl) break;
    396    // swap last & cur buffer ptrs
    397    tempx = x_last;
    398    x_last = x_cur;
    399    x_cur = tempx;
    400  }
    401  printf("\n numiter: %d\n", k);
    402  for (i = 0; i < bl; i++) {
    403    x[i] = x_cur[i];
    404  }
    405  free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
    406  return 0;
    407 }
    408 
    409 /*
    410 * Solve for Ax = b using Steepest descent method
    411 *
    412 * Input:
    413 * A: the sparse matrix
    414 * b: the vector b
    415 * bl: length of b
    416 * x: the vector x
    417 *
    418 * Output:
    419 * x: pointer to the solution vector
    420 *
    421 * Return: 0  - success
    422 *         -1 - failed
    423 */
    424 int av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl,
    425                                double *x) {
    426  double *d = NULL, *Ad = NULL, *Ax = NULL;
    427  double resi2, resi2_last, dAd, temp;
    428 
    429  d = aom_calloc(bl, sizeof(*d));
    430  Ax = aom_calloc(bl, sizeof(*Ax));
    431  Ad = aom_calloc(bl, sizeof(*Ad));
    432 
    433  if (!d || !Ax || !Ad) {
    434    free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
    435    return -1;
    436  }
    437 
    438  int i;
    439  // initialize with 0s
    440  resi2 = 0;
    441  for (i = 0; i < bl; i++) {
    442    x[i] = 0;
    443    d[i] = b[i];
    444    resi2 += d[i] * d[i] / bl;
    445  }
    446  int k;
    447  for (k = 0; k < MAX_CG_SP_ITER; k++) {
    448    // get A*x_last
    449    av1_mtx_vect_multi_right(A, d, Ad, bl);
    450    dAd = resi2 * bl / av1_vect_vect_multi(d, bl, Ad);
    451    for (i = 0; i < bl; i++) {
    452      temp = dAd * d[i];
    453      x[i] = x[i] + temp;
    454    }
    455    av1_mtx_vect_multi_right(A, x, Ax, bl);
    456    resi2_last = resi2;
    457    resi2 = 0;
    458    for (i = 0; i < bl; i++) {
    459      d[i] = b[i] - Ax[i];
    460      resi2 += d[i] * d[i] / bl;
    461    }
    462    if (resi2 <= 1e-8) break;
    463    if (resi2_last - resi2 < 1e-8) {
    464      break;
    465    }
    466  }
    467  free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
    468 
    469  return 0;
    470 }
    471 
    472 #endif  // CONFIG_OPTICAL_FLOW_API
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE