txb_rdopt.c (27466B)
1 /* 2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "av1/encoder/txb_rdopt.h" 13 #include "av1/encoder/txb_rdopt_utils.h" 14 15 #include "aom_ports/mem.h" 16 #include "av1/common/idct.h" 17 18 static inline void update_coeff_general( 19 int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size, 20 TX_CLASS tx_class, int bhl, int width, int64_t rdmult, int shift, 21 int dc_sign_ctx, const int16_t *dequant, const int16_t *scan, 22 const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, 23 tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, 24 const qm_val_t *iqmatrix, const qm_val_t *qmatrix) { 25 const int dqv = get_dqv(dequant, scan[si], iqmatrix); 26 const int ci = scan[si]; 27 const tran_low_t qc = qcoeff[ci]; 28 const int is_last = si == (eob - 1); 29 const int coeff_ctx = get_lower_levels_ctx_general( 30 is_last, si, bhl, width, levels, ci, tx_size, tx_class); 31 if (qc == 0) { 32 *accu_rate += txb_costs->base_cost[coeff_ctx][0]; 33 } else { 34 const int sign = (qc < 0) ? 1 : 0; 35 const tran_low_t abs_qc = abs(qc); 36 const tran_low_t tqc = tcoeff[ci]; 37 const tran_low_t dqc = dqcoeff[ci]; 38 const int64_t dist = get_coeff_dist(tqc, dqc, shift, qmatrix, ci); 39 const int64_t dist0 = get_coeff_dist(tqc, 0, shift, qmatrix, ci); 40 const int rate = 41 get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx, 42 dc_sign_ctx, txb_costs, bhl, tx_class, levels); 43 const int64_t rd = RDCOST(rdmult, rate, dist); 44 45 tran_low_t qc_low, dqc_low; 46 tran_low_t abs_qc_low; 47 int64_t dist_low, rd_low; 48 int rate_low; 49 if (abs_qc == 1) { 50 abs_qc_low = qc_low = dqc_low = 0; 51 dist_low = dist0; 52 rate_low = txb_costs->base_cost[coeff_ctx][0]; 53 } else { 54 get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); 55 abs_qc_low = abs_qc - 1; 56 dist_low = get_coeff_dist(tqc, dqc_low, shift, qmatrix, ci); 57 rate_low = 58 get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx, 59 dc_sign_ctx, txb_costs, bhl, tx_class, levels); 60 } 61 62 rd_low = RDCOST(rdmult, rate_low, dist_low); 63 if (rd_low < rd) { 64 qcoeff[ci] = qc_low; 65 dqcoeff[ci] = dqc_low; 66 levels[get_padded_idx(ci, bhl)] = AOMMIN(abs_qc_low, INT8_MAX); 67 *accu_rate += rate_low; 68 *accu_dist += dist_low - dist0; 69 } else { 70 *accu_rate += rate; 71 *accu_dist += dist - dist0; 72 } 73 } 74 } 75 76 static AOM_FORCE_INLINE void update_coeff_simple( 77 int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class, 78 int bhl, int64_t rdmult, int shift, const int16_t *dequant, 79 const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, 80 const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, 81 uint8_t *levels, int sharpness, const qm_val_t *iqmatrix, 82 const qm_val_t *qmatrix) { 83 const int dqv = get_dqv(dequant, scan[si], iqmatrix); 84 (void)eob; 85 // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) 86 // and not the last (scan_idx != eob - 1) 87 assert(si != eob - 1); 88 assert(si > 0); 89 const int ci = scan[si]; 90 const tran_low_t qc = qcoeff[ci]; 91 const int coeff_ctx = 92 get_lower_levels_ctx(levels, ci, bhl, tx_size, tx_class); 93 if (qc == 0) { 94 *accu_rate += txb_costs->base_cost[coeff_ctx][0]; 95 } else { 96 const tran_low_t abs_qc = abs(qc); 97 const tran_low_t abs_tqc = abs(tcoeff[ci]); 98 const tran_low_t abs_dqc = abs(dqcoeff[ci]); 99 int rate_low = 0; 100 const int rate = get_two_coeff_cost_simple( 101 ci, abs_qc, coeff_ctx, txb_costs, bhl, tx_class, levels, &rate_low); 102 if (abs_dqc < abs_tqc) { 103 *accu_rate += rate; 104 return; 105 } 106 107 const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift, qmatrix, ci); 108 const int64_t rd = RDCOST(rdmult, rate, dist); 109 110 const tran_low_t abs_qc_low = abs_qc - 1; 111 const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift; 112 const int64_t dist_low = 113 get_coeff_dist(abs_tqc, abs_dqc_low, shift, qmatrix, ci); 114 const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); 115 116 int allow_lower_qc = sharpness ? (abs_qc > 1) : 1; 117 118 if (rd_low < rd && allow_lower_qc) { 119 const int sign = (qc < 0) ? 1 : 0; 120 qcoeff[ci] = (-sign ^ abs_qc_low) + sign; 121 dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign; 122 levels[get_padded_idx(ci, bhl)] = AOMMIN(abs_qc_low, INT8_MAX); 123 *accu_rate += rate_low; 124 } else { 125 *accu_rate += rate; 126 } 127 } 128 } 129 130 static AOM_FORCE_INLINE void update_coeff_eob( 131 int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci, 132 int si, TX_SIZE tx_size, TX_CLASS tx_class, int bhl, int width, 133 int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant, 134 const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs, 135 const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, 136 tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness, 137 const qm_val_t *iqmatrix, const qm_val_t *qmatrix) { 138 const int dqv = get_dqv(dequant, scan[si], iqmatrix); 139 assert(si != *eob - 1); 140 const int ci = scan[si]; 141 const tran_low_t qc = qcoeff[ci]; 142 const int coeff_ctx = 143 get_lower_levels_ctx(levels, ci, bhl, tx_size, tx_class); 144 if (qc == 0) { 145 *accu_rate += txb_costs->base_cost[coeff_ctx][0]; 146 } else { 147 int lower_level = 0; 148 const tran_low_t abs_qc = abs(qc); 149 const tran_low_t tqc = tcoeff[ci]; 150 const tran_low_t dqc = dqcoeff[ci]; 151 const int sign = (qc < 0) ? 1 : 0; 152 const int64_t dist0 = get_coeff_dist(tqc, 0, shift, qmatrix, ci); 153 int64_t dist = get_coeff_dist(tqc, dqc, shift, qmatrix, ci) - dist0; 154 int rate = 155 get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, 156 txb_costs, bhl, tx_class, levels); 157 int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist); 158 159 tran_low_t qc_low, dqc_low; 160 tran_low_t abs_qc_low; 161 int64_t dist_low, rd_low; 162 int rate_low; 163 164 if (abs_qc == 1) { 165 abs_qc_low = 0; 166 dqc_low = qc_low = 0; 167 dist_low = 0; 168 rate_low = txb_costs->base_cost[coeff_ctx][0]; 169 rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist); 170 } else { 171 get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); 172 abs_qc_low = abs_qc - 1; 173 dist_low = get_coeff_dist(tqc, dqc_low, shift, qmatrix, ci) - dist0; 174 rate_low = 175 get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, 176 dc_sign_ctx, txb_costs, bhl, tx_class, levels); 177 rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); 178 } 179 180 int lower_level_new_eob = 0; 181 const int new_eob = si + 1; 182 const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bhl, width, si); 183 const int new_eob_cost = 184 get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class); 185 int rate_coeff_eob = 186 new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, 187 dc_sign_ctx, txb_costs, bhl, 188 tx_class); 189 int64_t dist_new_eob = dist; 190 int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob); 191 192 if (abs_qc_low > 0) { 193 const int rate_coeff_eob_low = 194 new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign, 195 coeff_ctx_new_eob, dc_sign_ctx, 196 txb_costs, bhl, tx_class); 197 const int64_t dist_new_eob_low = dist_low; 198 const int64_t rd_new_eob_low = 199 RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low); 200 if (rd_new_eob_low < rd_new_eob) { 201 lower_level_new_eob = 1; 202 rd_new_eob = rd_new_eob_low; 203 rate_coeff_eob = rate_coeff_eob_low; 204 dist_new_eob = dist_new_eob_low; 205 } 206 } 207 208 const int qc_threshold = (si <= 5) ? 2 : 1; 209 const int allow_lower_qc = sharpness ? abs_qc > qc_threshold : 1; 210 211 if (allow_lower_qc) { 212 if (rd_low < rd) { 213 lower_level = 1; 214 rd = rd_low; 215 rate = rate_low; 216 dist = dist_low; 217 } 218 } 219 220 if ((sharpness == 0 || new_eob >= 5) && rd_new_eob < rd) { 221 for (int ni = 0; ni < *nz_num; ++ni) { 222 int last_ci = nz_ci[ni]; 223 levels[get_padded_idx(last_ci, bhl)] = 0; 224 qcoeff[last_ci] = 0; 225 dqcoeff[last_ci] = 0; 226 } 227 *eob = new_eob; 228 *nz_num = 0; 229 *accu_rate = rate_coeff_eob; 230 *accu_dist = dist_new_eob; 231 lower_level = lower_level_new_eob; 232 } else { 233 *accu_rate += rate; 234 *accu_dist += dist; 235 } 236 237 if (lower_level) { 238 qcoeff[ci] = qc_low; 239 dqcoeff[ci] = dqc_low; 240 levels[get_padded_idx(ci, bhl)] = AOMMIN(abs_qc_low, INT8_MAX); 241 } 242 if (qcoeff[ci]) { 243 nz_ci[*nz_num] = ci; 244 ++*nz_num; 245 } 246 } 247 } 248 249 static inline void update_skip(int *accu_rate, int64_t accu_dist, int *eob, 250 int nz_num, int *nz_ci, int64_t rdmult, 251 int skip_cost, int non_skip_cost, 252 tran_low_t *qcoeff, tran_low_t *dqcoeff) { 253 const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist); 254 const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0); 255 if (rd_new_eob < rd) { 256 for (int i = 0; i < nz_num; ++i) { 257 const int ci = nz_ci[i]; 258 qcoeff[ci] = 0; 259 dqcoeff[ci] = 0; 260 // no need to set up levels because this is the last step 261 // levels[get_padded_idx(ci, bhl)] = 0; 262 } 263 *accu_rate = 0; 264 *eob = 0; 265 } 266 } 267 268 // TODO(angiebird): use this function whenever it's possible 269 static int get_tx_type_cost(const MACROBLOCK *x, const MACROBLOCKD *xd, 270 int plane, TX_SIZE tx_size, TX_TYPE tx_type, 271 int reduced_tx_set_used) { 272 if (plane > 0) return 0; 273 274 const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; 275 276 const MB_MODE_INFO *mbmi = xd->mi[0]; 277 const int is_inter = is_inter_block(mbmi); 278 if (get_ext_tx_types(tx_size, is_inter, reduced_tx_set_used) > 1 && 279 !xd->lossless[xd->mi[0]->segment_id]) { 280 const int ext_tx_set = 281 get_ext_tx_set(tx_size, is_inter, reduced_tx_set_used); 282 if (is_inter) { 283 if (ext_tx_set > 0) 284 return x->mode_costs 285 .inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type]; 286 } else { 287 if (ext_tx_set > 0) { 288 PREDICTION_MODE intra_dir; 289 if (mbmi->filter_intra_mode_info.use_filter_intra) 290 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info 291 .filter_intra_mode]; 292 else 293 intra_dir = mbmi->mode; 294 return x->mode_costs.intra_tx_type_costs[ext_tx_set][square_tx_size] 295 [intra_dir][tx_type]; 296 } 297 } 298 } 299 return 0; 300 } 301 302 int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, 303 int block, TX_SIZE tx_size, TX_TYPE tx_type, 304 const TXB_CTX *const txb_ctx, int *rate_cost, 305 int sharpness) { 306 MACROBLOCKD *xd = &x->e_mbd; 307 const struct macroblock_plane *p = &x->plane[plane]; 308 const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); 309 const int16_t *scan = scan_order->scan; 310 const int shift = av1_get_tx_scale(tx_size); 311 int eob = p->eobs[block]; 312 const int16_t *dequant = p->dequant_QTX; 313 const qm_val_t *iqmatrix = 314 av1_get_iqmatrix(&cpi->common.quant_params, xd, plane, tx_size, tx_type); 315 const qm_val_t *qmatrix = 316 cpi->oxcf.tune_cfg.dist_metric == AOM_DIST_METRIC_QM_PSNR 317 ? av1_get_qmatrix(&cpi->common.quant_params, xd, plane, tx_size, 318 tx_type) 319 : NULL; 320 const int block_offset = BLOCK_OFFSET(block); 321 tran_low_t *qcoeff = p->qcoeff + block_offset; 322 tran_low_t *dqcoeff = p->dqcoeff + block_offset; 323 const tran_low_t *tcoeff = p->coeff + block_offset; 324 const CoeffCosts *coeff_costs = &x->coeff_costs; 325 326 // This function is not called if eob = 0. 327 assert(eob > 0); 328 329 const AV1_COMMON *cm = &cpi->common; 330 const PLANE_TYPE plane_type = get_plane_type(plane); 331 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 332 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 333 const MB_MODE_INFO *mbmi = xd->mi[0]; 334 const int bhl = get_txb_bhl(tx_size); 335 const int width = get_txb_wide(tx_size); 336 const int height = get_txb_high(tx_size); 337 assert(height == (1 << bhl)); 338 const int is_inter = is_inter_block(mbmi); 339 const LV_MAP_COEFF_COST *txb_costs = 340 &coeff_costs->coeff_costs[txs_ctx][plane_type]; 341 const int eob_multi_size = txsize_log2_minus4[tx_size]; 342 const LV_MAP_EOB_COST *txb_eob_costs = 343 &coeff_costs->eob_costs[eob_multi_size][plane_type]; 344 345 // For the IQ and SSIMULACRA 2 tunings, increase rshift from 2 to 4. 346 // This biases trellis quantization towards keeping more coefficients, and 347 // together with the IQ and SSIMULACRA2 rdmult adjustment in 348 // av1_compute_rd_mult_based_on_qindex(), this helps preserve image 349 // features (like repeating patterns and camera noise/film grain), which 350 // improves SSIMULACRA 2 scores. 351 const int rshift = (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || 352 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) 353 ? 7 354 : 5; 355 356 const int64_t rdmult = ROUND_POWER_OF_TWO( 357 (int64_t)x->rdmult * (8 - sharpness) * 358 (plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))), 359 rshift); 360 361 uint8_t levels_buf[TX_PAD_2D]; 362 uint8_t *const levels = set_levels(levels_buf, height); 363 364 if (eob > 1) av1_txb_init_levels(qcoeff, width, height, levels); 365 366 // TODO(angirbird): check iqmatrix 367 368 const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0]; 369 const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; 370 const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class); 371 int accu_rate = eob_cost; 372 int64_t accu_dist = 0; 373 int si = eob - 1; 374 const int ci = scan[si]; 375 const tran_low_t qc = qcoeff[ci]; 376 const tran_low_t abs_qc = abs(qc); 377 const int sign = qc < 0; 378 const int max_nz_num = 2; 379 int nz_num = 1; 380 int nz_ci[3] = { ci, 0, 0 }; 381 if (abs_qc >= 2) { 382 update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class, 383 bhl, width, rdmult, shift, txb_ctx->dc_sign_ctx, 384 dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, 385 levels, iqmatrix, qmatrix); 386 --si; 387 } else { 388 assert(abs_qc == 1); 389 const int coeff_ctx = get_lower_levels_ctx_eob(bhl, width, si); 390 accu_rate += 391 get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, txb_ctx->dc_sign_ctx, 392 txb_costs, bhl, tx_class); 393 const tran_low_t tqc = tcoeff[ci]; 394 const tran_low_t dqc = dqcoeff[ci]; 395 const int64_t dist = get_coeff_dist(tqc, dqc, shift, qmatrix, ci); 396 const int64_t dist0 = get_coeff_dist(tqc, 0, shift, qmatrix, ci); 397 accu_dist += dist - dist0; 398 --si; 399 } 400 401 #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ 402 case tx_class_literal: \ 403 for (; si >= 0 && nz_num <= max_nz_num; --si) { \ 404 update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si, \ 405 tx_size, tx_class_literal, bhl, width, \ 406 txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \ 407 txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff, \ 408 levels, sharpness, iqmatrix, qmatrix); \ 409 } \ 410 break 411 switch (tx_class) { 412 UPDATE_COEFF_EOB_CASE(TX_CLASS_2D); 413 UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ); 414 UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT); 415 #undef UPDATE_COEFF_EOB_CASE 416 default: assert(false); 417 } 418 419 if (si == -1 && nz_num <= max_nz_num && sharpness == 0) { 420 update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost, 421 non_skip_cost, qcoeff, dqcoeff); 422 } 423 424 #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \ 425 case tx_class_literal: \ 426 for (; si >= 1; --si) { \ 427 update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bhl, \ 428 rdmult, shift, dequant, scan, txb_costs, tcoeff, \ 429 qcoeff, dqcoeff, levels, sharpness, iqmatrix, \ 430 qmatrix); \ 431 } \ 432 break 433 switch (tx_class) { 434 UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D); 435 UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ); 436 UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT); 437 #undef UPDATE_COEFF_SIMPLE_CASE 438 default: assert(false); 439 } 440 441 // DC position 442 if (si == 0) { 443 // no need to update accu_dist because it's not used after this point 444 int64_t dummy_dist = 0; 445 update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class, 446 bhl, width, rdmult, shift, txb_ctx->dc_sign_ctx, 447 dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, 448 levels, iqmatrix, qmatrix); 449 } 450 451 const int tx_type_cost = get_tx_type_cost(x, xd, plane, tx_size, tx_type, 452 cm->features.reduced_tx_set_used); 453 if (eob == 0) 454 accu_rate += skip_cost; 455 else 456 accu_rate += non_skip_cost + tx_type_cost; 457 458 p->eobs[block] = eob; 459 p->txb_entropy_ctx[block] = 460 av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]); 461 462 *rate_cost = accu_rate; 463 return eob; 464 } 465 466 static AOM_FORCE_INLINE int warehouse_efficients_txb( 467 const MACROBLOCK *x, const int plane, const int block, 468 const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, 469 const struct macroblock_plane *p, const int eob, 470 const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs, 471 const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_CLASS tx_class, 472 int reduced_tx_set_used) { 473 const tran_low_t *const qcoeff = p->qcoeff + BLOCK_OFFSET(block); 474 const int txb_skip_ctx = txb_ctx->txb_skip_ctx; 475 const int bhl = get_txb_bhl(tx_size); 476 const int width = get_txb_wide(tx_size); 477 const int height = get_txb_high(tx_size); 478 const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); 479 const int16_t *const scan = scan_order->scan; 480 uint8_t levels_buf[TX_PAD_2D]; 481 uint8_t *const levels = set_levels(levels_buf, height); 482 DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); 483 const int eob_multi_size = txsize_log2_minus4[tx_size]; 484 const LV_MAP_EOB_COST *const eob_costs = 485 &x->coeff_costs.eob_costs[eob_multi_size][plane_type]; 486 int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; 487 488 av1_txb_init_levels(qcoeff, width, height, levels); 489 490 cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used); 491 492 cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); 493 494 av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); 495 496 const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] = 497 coeff_costs->lps_cost; 498 int c = eob - 1; 499 { 500 const int pos = scan[c]; 501 const tran_low_t v = qcoeff[pos]; 502 const int sign = AOMSIGN(v); 503 const int level = (v ^ sign) - sign; 504 const int coeff_ctx = coeff_contexts[pos]; 505 cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; 506 507 if (v) { 508 // sign bit cost 509 if (level > NUM_BASE_LEVELS) { 510 const int ctx = get_br_ctx_eob(pos, bhl, tx_class); 511 cost += get_br_cost(level, lps_cost[ctx]); 512 } 513 if (c) { 514 cost += av1_cost_literal(1); 515 } else { 516 const int sign01 = (sign ^ sign) - sign; 517 const int dc_sign_ctx = txb_ctx->dc_sign_ctx; 518 cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; 519 return cost; 520 } 521 } 522 } 523 const int(*base_cost)[8] = coeff_costs->base_cost; 524 for (c = eob - 2; c >= 1; --c) { 525 const int pos = scan[c]; 526 const int coeff_ctx = coeff_contexts[pos]; 527 const tran_low_t v = qcoeff[pos]; 528 const int level = abs(v); 529 cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; 530 if (v) { 531 // sign bit cost 532 cost += av1_cost_literal(1); 533 if (level > NUM_BASE_LEVELS) { 534 const int ctx = get_br_ctx(levels, pos, bhl, tx_class); 535 cost += get_br_cost(level, lps_cost[ctx]); 536 } 537 } 538 } 539 // c == 0 after previous loop 540 { 541 const int pos = scan[c]; 542 const tran_low_t v = qcoeff[pos]; 543 const int coeff_ctx = coeff_contexts[pos]; 544 const int sign = AOMSIGN(v); 545 const int level = (v ^ sign) - sign; 546 cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; 547 548 if (v) { 549 // sign bit cost 550 const int sign01 = (sign ^ sign) - sign; 551 const int dc_sign_ctx = txb_ctx->dc_sign_ctx; 552 cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; 553 if (level > NUM_BASE_LEVELS) { 554 const int ctx = get_br_ctx(levels, pos, bhl, tx_class); 555 cost += get_br_cost(level, lps_cost[ctx]); 556 } 557 } 558 } 559 return cost; 560 } 561 562 /*!\brief Estimate the entropy cost of transform coefficients using Laplacian 563 * distribution. 564 * 565 * \ingroup coefficient_coding 566 * 567 * This function assumes each transform coefficient is of its own Laplacian 568 * distribution and the coefficient is the only observation of the Laplacian 569 * distribution. 570 * 571 * Based on that, each coefficient's coding cost can be estimated by computing 572 * the entropy of the corresponding Laplacian distribution. 573 * 574 * This function then return the sum of the estimated entropy cost for all 575 * coefficients in the transform block. 576 * 577 * Note that the entropy cost of end of block (eob) and transform type (tx_type) 578 * are not included. 579 * 580 * \param[in] x Pointer to structure holding the data for the 581 current encoding macroblock 582 * \param[in] plane The index of the current plane 583 * \param[in] block The index of the current transform block in the 584 * macroblock. It's defined by number of 4x4 units that have been coded before 585 * the currernt transform block 586 * \param[in] tx_size The transform size 587 * \param[in] tx_type The transform type 588 * \return int Estimated entropy cost of coefficients in the 589 * transform block. 590 */ 591 static int av1_cost_coeffs_txb_estimate(const MACROBLOCK *x, const int plane, 592 const int block, const TX_SIZE tx_size, 593 const TX_TYPE tx_type) { 594 assert(plane == 0); 595 596 int cost = 0; 597 const struct macroblock_plane *p = &x->plane[plane]; 598 const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); 599 const int16_t *scan = scan_order->scan; 600 tran_low_t *qcoeff = p->qcoeff + BLOCK_OFFSET(block); 601 602 int eob = p->eobs[block]; 603 604 // coeffs 605 int c = eob - 1; 606 // eob 607 { 608 const int pos = scan[c]; 609 const tran_low_t v = abs(qcoeff[pos]) - 1; 610 cost += (v << (AV1_PROB_COST_SHIFT + 2)); 611 } 612 // other coeffs 613 for (c = eob - 2; c >= 0; c--) { 614 const int pos = scan[c]; 615 const tran_low_t v = abs(qcoeff[pos]); 616 const int idx = AOMMIN(v, 14); 617 618 cost += costLUT[idx]; 619 } 620 621 // const_term does not contain DC, and log(e) does not contain eob, so both 622 // (eob-1) 623 cost += (const_term + loge_par) * (eob - 1); 624 625 return cost; 626 } 627 628 static AOM_FORCE_INLINE int warehouse_efficients_txb_laplacian( 629 const MACROBLOCK *x, const int plane, const int block, 630 const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, const int eob, 631 const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs, 632 const MACROBLOCKD *const xd, const TX_TYPE tx_type, const TX_CLASS tx_class, 633 int reduced_tx_set_used) { 634 const int txb_skip_ctx = txb_ctx->txb_skip_ctx; 635 636 const int eob_multi_size = txsize_log2_minus4[tx_size]; 637 const LV_MAP_EOB_COST *const eob_costs = 638 &x->coeff_costs.eob_costs[eob_multi_size][plane_type]; 639 int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; 640 641 cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used); 642 643 cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); 644 645 cost += av1_cost_coeffs_txb_estimate(x, plane, block, tx_size, tx_type); 646 return cost; 647 } 648 649 int av1_cost_coeffs_txb(const MACROBLOCK *x, const int plane, const int block, 650 const TX_SIZE tx_size, const TX_TYPE tx_type, 651 const TXB_CTX *const txb_ctx, int reduced_tx_set_used) { 652 const struct macroblock_plane *p = &x->plane[plane]; 653 const int eob = p->eobs[block]; 654 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 655 const PLANE_TYPE plane_type = get_plane_type(plane); 656 const LV_MAP_COEFF_COST *const coeff_costs = 657 &x->coeff_costs.coeff_costs[txs_ctx][plane_type]; 658 if (eob == 0) { 659 return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; 660 } 661 662 const MACROBLOCKD *const xd = &x->e_mbd; 663 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 664 665 return warehouse_efficients_txb(x, plane, block, tx_size, txb_ctx, p, eob, 666 plane_type, coeff_costs, xd, tx_type, 667 tx_class, reduced_tx_set_used); 668 } 669 670 int av1_cost_coeffs_txb_laplacian(const MACROBLOCK *x, const int plane, 671 const int block, const TX_SIZE tx_size, 672 const TX_TYPE tx_type, 673 const TXB_CTX *const txb_ctx, 674 const int reduced_tx_set_used, 675 const int adjust_eob) { 676 const struct macroblock_plane *p = &x->plane[plane]; 677 int eob = p->eobs[block]; 678 679 if (adjust_eob) { 680 const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); 681 const int16_t *scan = scan_order->scan; 682 tran_low_t *tcoeff = p->coeff + BLOCK_OFFSET(block); 683 tran_low_t *qcoeff = p->qcoeff + BLOCK_OFFSET(block); 684 tran_low_t *dqcoeff = p->dqcoeff + BLOCK_OFFSET(block); 685 update_coeff_eob_fast(&eob, av1_get_tx_scale(tx_size), p->dequant_QTX, scan, 686 tcoeff, qcoeff, dqcoeff); 687 p->eobs[block] = eob; 688 } 689 690 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 691 const PLANE_TYPE plane_type = get_plane_type(plane); 692 const LV_MAP_COEFF_COST *const coeff_costs = 693 &x->coeff_costs.coeff_costs[txs_ctx][plane_type]; 694 if (eob == 0) { 695 return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; 696 } 697 698 const MACROBLOCKD *const xd = &x->e_mbd; 699 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 700 701 return warehouse_efficients_txb_laplacian( 702 x, plane, block, tx_size, txb_ctx, eob, plane_type, coeff_costs, xd, 703 tx_type, tx_class, reduced_tx_set_used); 704 }