sparse_linear_solver.c (12591B)
1 /* 2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #include "av1/common/av1_common_int.h" 12 #include "av1/encoder/sparse_linear_solver.h" 13 #include "config/aom_config.h" 14 #include "aom_mem/aom_mem.h" 15 #include "av1/common/alloccommon.h" 16 17 #if CONFIG_OPTICAL_FLOW_API 18 /* 19 * Input: 20 * rows: array of row positions 21 * cols: array of column positions 22 * values: array of element values 23 * num_elem: total number of elements in the matrix 24 * num_rows: number of rows in the matrix 25 * num_cols: number of columns in the matrix 26 * 27 * Output: 28 * sm: pointer to the sparse matrix to be initialized 29 * 30 * Return: 0 - success 31 * -1 - failed 32 */ 33 int av1_init_sparse_mtx(const int *rows, const int *cols, const double *values, 34 int num_elem, int num_rows, int num_cols, 35 SPARSE_MTX *sm) { 36 sm->n_elem = num_elem; 37 sm->n_rows = num_rows; 38 sm->n_cols = num_cols; 39 if (num_elem == 0) { 40 sm->row_pos = NULL; 41 sm->col_pos = NULL; 42 sm->value = NULL; 43 return 0; 44 } 45 sm->row_pos = aom_calloc(num_elem, sizeof(*sm->row_pos)); 46 sm->col_pos = aom_calloc(num_elem, sizeof(*sm->col_pos)); 47 sm->value = aom_calloc(num_elem, sizeof(*sm->value)); 48 49 if (!sm->row_pos || !sm->col_pos || !sm->value) { 50 av1_free_sparse_mtx_elems(sm); 51 return -1; 52 } 53 54 memcpy(sm->row_pos, rows, num_elem * sizeof(*sm->row_pos)); 55 memcpy(sm->col_pos, cols, num_elem * sizeof(*sm->col_pos)); 56 memcpy(sm->value, values, num_elem * sizeof(*sm->value)); 57 58 return 0; 59 } 60 61 /* 62 * Combines two sparse matrices (allocating new space). 63 * 64 * Input: 65 * sm1, sm2: matrices to be combined 66 * row_offset1, row_offset2: row offset of each matrix in the new matrix 67 * col_offset1, col_offset2: column offset of each matrix in the new matrix 68 * new_n_rows, new_n_cols: number of rows and columns in the new matrix 69 * 70 * Output: 71 * sm: the combined matrix 72 * 73 * Return: 0 - success 74 * -1 - failed 75 */ 76 int av1_init_combine_sparse_mtx(const SPARSE_MTX *sm1, const SPARSE_MTX *sm2, 77 SPARSE_MTX *sm, int row_offset1, 78 int col_offset1, int row_offset2, 79 int col_offset2, int new_n_rows, 80 int new_n_cols) { 81 sm->n_elem = sm1->n_elem + sm2->n_elem; 82 sm->n_cols = new_n_cols; 83 sm->n_rows = new_n_rows; 84 85 if (sm->n_elem == 0) { 86 sm->row_pos = NULL; 87 sm->col_pos = NULL; 88 sm->value = NULL; 89 return 0; 90 } 91 92 sm->row_pos = aom_calloc(sm->n_elem, sizeof(*sm->row_pos)); 93 sm->col_pos = aom_calloc(sm->n_elem, sizeof(*sm->col_pos)); 94 sm->value = aom_calloc(sm->n_elem, sizeof(*sm->value)); 95 96 if (!sm->row_pos || !sm->col_pos || !sm->value) { 97 av1_free_sparse_mtx_elems(sm); 98 return -1; 99 } 100 101 for (int i = 0; i < sm1->n_elem; i++) { 102 sm->row_pos[i] = sm1->row_pos[i] + row_offset1; 103 sm->col_pos[i] = sm1->col_pos[i] + col_offset1; 104 } 105 memcpy(sm->value, sm1->value, sm1->n_elem * sizeof(*sm1->value)); 106 int n_elem1 = sm1->n_elem; 107 for (int i = 0; i < sm2->n_elem; i++) { 108 sm->row_pos[n_elem1 + i] = sm2->row_pos[i] + row_offset2; 109 sm->col_pos[n_elem1 + i] = sm2->col_pos[i] + col_offset2; 110 } 111 memcpy(sm->value + n_elem1, sm2->value, sm2->n_elem * sizeof(*sm2->value)); 112 return 0; 113 } 114 115 void av1_free_sparse_mtx_elems(SPARSE_MTX *sm) { 116 sm->n_cols = 0; 117 sm->n_rows = 0; 118 if (sm->n_elem != 0) { 119 aom_free(sm->row_pos); 120 aom_free(sm->col_pos); 121 aom_free(sm->value); 122 } 123 sm->n_elem = 0; 124 } 125 126 /* 127 * Calculate matrix and vector multiplication: A*b 128 * 129 * Input: 130 * sm: matrix A 131 * srcv: the vector b to be multiplied to 132 * dstl: the length of vectors 133 * 134 * Output: 135 * dstv: pointer to the resulting vector 136 */ 137 void av1_mtx_vect_multi_right(const SPARSE_MTX *sm, const double *srcv, 138 double *dstv, int dstl) { 139 memset(dstv, 0, sizeof(*dstv) * dstl); 140 for (int i = 0; i < sm->n_elem; i++) { 141 dstv[sm->row_pos[i]] += srcv[sm->col_pos[i]] * sm->value[i]; 142 } 143 } 144 /* 145 * Calculate matrix and vector multiplication: b*A 146 * 147 * Input: 148 * sm: matrix A 149 * srcv: the vector b to be multiplied to 150 * dstl: the length of vectors 151 * 152 * Output: 153 * dstv: pointer to the resulting vector 154 */ 155 void av1_mtx_vect_multi_left(const SPARSE_MTX *sm, const double *srcv, 156 double *dstv, int dstl) { 157 memset(dstv, 0, sizeof(*dstv) * dstl); 158 for (int i = 0; i < sm->n_elem; i++) { 159 dstv[sm->col_pos[i]] += srcv[sm->row_pos[i]] * sm->value[i]; 160 } 161 } 162 163 /* 164 * Calculate inner product of two vectors 165 * 166 * Input: 167 * src1, scr2: the vectors to be multiplied 168 * src1l: length of the vectors 169 * 170 * Output: 171 * the inner product 172 */ 173 double av1_vect_vect_multi(const double *src1, int src1l, const double *src2) { 174 double result = 0; 175 for (int i = 0; i < src1l; i++) { 176 result += src1[i] * src2[i]; 177 } 178 return result; 179 } 180 181 /* 182 * Multiply each element in the matrix sm with a constant c 183 */ 184 void av1_constant_multiply_sparse_matrix(SPARSE_MTX *sm, double c) { 185 for (int i = 0; i < sm->n_elem; i++) { 186 sm->value[i] *= c; 187 } 188 } 189 190 static inline void free_solver_local_buf(double *buf1, double *buf2, 191 double *buf3, double *buf4, 192 double *buf5, double *buf6, 193 double *buf7) { 194 aom_free(buf1); 195 aom_free(buf2); 196 aom_free(buf3); 197 aom_free(buf4); 198 aom_free(buf5); 199 aom_free(buf6); 200 aom_free(buf7); 201 } 202 203 /* 204 * Solve for Ax = b 205 * no requirement on A 206 * 207 * Input: 208 * A: the sparse matrix 209 * b: the vector b 210 * bl: length of b 211 * x: the vector x 212 * 213 * Output: 214 * x: pointer to the solution vector 215 * 216 * Return: 0 - success 217 * -1 - failed 218 */ 219 int av1_bi_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b, 220 int bl, double *x) { 221 double *r = NULL, *r_hat = NULL, *p = NULL, *p_hat = NULL, *Ap = NULL, 222 *p_hatA = NULL, *x_hat = NULL; 223 double alpha, beta, rtr, r_norm_2; 224 double denormtemp; 225 226 // initialize 227 r = aom_calloc(bl, sizeof(*r)); 228 r_hat = aom_calloc(bl, sizeof(*r_hat)); 229 p = aom_calloc(bl, sizeof(*p)); 230 p_hat = aom_calloc(bl, sizeof(*p_hat)); 231 Ap = aom_calloc(bl, sizeof(*Ap)); 232 p_hatA = aom_calloc(bl, sizeof(*p_hatA)); 233 x_hat = aom_calloc(bl, sizeof(*x_hat)); 234 if (!r || !r_hat || !p || !p_hat || !Ap || !p_hatA || !x_hat) { 235 free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat); 236 return -1; 237 } 238 239 int i; 240 for (i = 0; i < bl; i++) { 241 r[i] = b[i]; 242 r_hat[i] = b[i]; 243 p[i] = r[i]; 244 p_hat[i] = r_hat[i]; 245 x[i] = 0; 246 x_hat[i] = 0; 247 } 248 r_norm_2 = av1_vect_vect_multi(r_hat, bl, r); 249 for (int k = 0; k < MAX_CG_SP_ITER; k++) { 250 rtr = r_norm_2; 251 av1_mtx_vect_multi_right(A, p, Ap, bl); 252 av1_mtx_vect_multi_left(A, p_hat, p_hatA, bl); 253 254 denormtemp = av1_vect_vect_multi(p_hat, bl, Ap); 255 if (denormtemp < 1e-10) break; 256 alpha = rtr / denormtemp; 257 r_norm_2 = 0; 258 for (i = 0; i < bl; i++) { 259 x[i] += alpha * p[i]; 260 x_hat[i] += alpha * p_hat[i]; 261 r[i] -= alpha * Ap[i]; 262 r_hat[i] -= alpha * p_hatA[i]; 263 r_norm_2 += r_hat[i] * r[i]; 264 } 265 if (sqrt(r_norm_2) < 1e-2) { 266 break; 267 } 268 if (rtr < 1e-10) break; 269 beta = r_norm_2 / rtr; 270 for (i = 0; i < bl; i++) { 271 p[i] = r[i] + beta * p[i]; 272 p_hat[i] = r_hat[i] + beta * p_hat[i]; 273 } 274 } 275 // free 276 free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat); 277 return 0; 278 } 279 280 /* 281 * Solve for Ax = b when A is symmetric and positive definite 282 * 283 * Input: 284 * A: the sparse matrix 285 * b: the vector b 286 * bl: length of b 287 * x: the vector x 288 * 289 * Output: 290 * x: pointer to the solution vector 291 * 292 * Return: 0 - success 293 * -1 - failed 294 */ 295 int av1_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b, int bl, 296 double *x) { 297 double *r = NULL, *p = NULL, *Ap = NULL; 298 double alpha, beta, rtr, r_norm_2; 299 double denormtemp; 300 301 // initialize 302 r = aom_calloc(bl, sizeof(*r)); 303 p = aom_calloc(bl, sizeof(*p)); 304 Ap = aom_calloc(bl, sizeof(*Ap)); 305 if (!r || !p || !Ap) { 306 free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL); 307 return -1; 308 } 309 310 int i; 311 for (i = 0; i < bl; i++) { 312 r[i] = b[i]; 313 p[i] = r[i]; 314 x[i] = 0; 315 } 316 r_norm_2 = av1_vect_vect_multi(r, bl, r); 317 int k; 318 for (k = 0; k < MAX_CG_SP_ITER; k++) { 319 rtr = r_norm_2; 320 av1_mtx_vect_multi_right(A, p, Ap, bl); 321 denormtemp = av1_vect_vect_multi(p, bl, Ap); 322 if (denormtemp < 1e-10) break; 323 alpha = rtr / denormtemp; 324 r_norm_2 = 0; 325 for (i = 0; i < bl; i++) { 326 x[i] += alpha * p[i]; 327 r[i] -= alpha * Ap[i]; 328 r_norm_2 += r[i] * r[i]; 329 } 330 if (r_norm_2 < 1e-8 * bl) break; 331 if (rtr < 1e-10) break; 332 beta = r_norm_2 / rtr; 333 for (i = 0; i < bl; i++) { 334 p[i] = r[i] + beta * p[i]; 335 } 336 } 337 // free 338 free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL); 339 340 return 0; 341 } 342 343 /* 344 * Solve for Ax = b using Jacobi method 345 * 346 * Input: 347 * A: the sparse matrix 348 * b: the vector b 349 * bl: length of b 350 * x: the vector x 351 * 352 * Output: 353 * x: pointer to the solution vector 354 * 355 * Return: 0 - success 356 * -1 - failed 357 */ 358 int av1_jacobi_sparse(const SPARSE_MTX *A, const double *b, int bl, double *x) { 359 double *diags = NULL, *Rx = NULL, *x_last = NULL, *x_cur = NULL, 360 *tempx = NULL; 361 double resi2; 362 363 diags = aom_calloc(bl, sizeof(*diags)); 364 Rx = aom_calloc(bl, sizeof(*Rx)); 365 x_last = aom_calloc(bl, sizeof(*x_last)); 366 x_cur = aom_calloc(bl, sizeof(*x_cur)); 367 368 if (!diags || !Rx || !x_last || !x_cur) { 369 free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL); 370 return -1; 371 } 372 373 int i; 374 memset(x_last, 0, sizeof(*x_last) * bl); 375 // get the diagonals of A 376 memset(diags, 0, sizeof(*diags) * bl); 377 for (int c = 0; c < A->n_elem; c++) { 378 if (A->row_pos[c] != A->col_pos[c]) continue; 379 diags[A->row_pos[c]] = A->value[c]; 380 } 381 int k; 382 for (k = 0; k < MAX_CG_SP_ITER; k++) { 383 // R = A - diag(diags) 384 // get R*x_last 385 memset(Rx, 0, sizeof(*Rx) * bl); 386 for (int c = 0; c < A->n_elem; c++) { 387 if (A->row_pos[c] == A->col_pos[c]) continue; 388 Rx[A->row_pos[c]] += x_last[A->col_pos[c]] * A->value[c]; 389 } 390 resi2 = 0; 391 for (i = 0; i < bl; i++) { 392 x_cur[i] = (b[i] - Rx[i]) / diags[i]; 393 resi2 += (x_last[i] - x_cur[i]) * (x_last[i] - x_cur[i]); 394 } 395 if (resi2 <= 1e-10 * bl) break; 396 // swap last & cur buffer ptrs 397 tempx = x_last; 398 x_last = x_cur; 399 x_cur = tempx; 400 } 401 printf("\n numiter: %d\n", k); 402 for (i = 0; i < bl; i++) { 403 x[i] = x_cur[i]; 404 } 405 free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL); 406 return 0; 407 } 408 409 /* 410 * Solve for Ax = b using Steepest descent method 411 * 412 * Input: 413 * A: the sparse matrix 414 * b: the vector b 415 * bl: length of b 416 * x: the vector x 417 * 418 * Output: 419 * x: pointer to the solution vector 420 * 421 * Return: 0 - success 422 * -1 - failed 423 */ 424 int av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl, 425 double *x) { 426 double *d = NULL, *Ad = NULL, *Ax = NULL; 427 double resi2, resi2_last, dAd, temp; 428 429 d = aom_calloc(bl, sizeof(*d)); 430 Ax = aom_calloc(bl, sizeof(*Ax)); 431 Ad = aom_calloc(bl, sizeof(*Ad)); 432 433 if (!d || !Ax || !Ad) { 434 free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL); 435 return -1; 436 } 437 438 int i; 439 // initialize with 0s 440 resi2 = 0; 441 for (i = 0; i < bl; i++) { 442 x[i] = 0; 443 d[i] = b[i]; 444 resi2 += d[i] * d[i] / bl; 445 } 446 int k; 447 for (k = 0; k < MAX_CG_SP_ITER; k++) { 448 // get A*x_last 449 av1_mtx_vect_multi_right(A, d, Ad, bl); 450 dAd = resi2 * bl / av1_vect_vect_multi(d, bl, Ad); 451 for (i = 0; i < bl; i++) { 452 temp = dAd * d[i]; 453 x[i] = x[i] + temp; 454 } 455 av1_mtx_vect_multi_right(A, x, Ax, bl); 456 resi2_last = resi2; 457 resi2 = 0; 458 for (i = 0; i < bl; i++) { 459 d[i] = b[i] - Ax[i]; 460 resi2 += d[i] * d[i] / bl; 461 } 462 if (resi2 <= 1e-8) break; 463 if (resi2_last - resi2 < 1e-8) { 464 break; 465 } 466 } 467 free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL); 468 469 return 0; 470 } 471 472 #endif // CONFIG_OPTICAL_FLOW_API