rd.c (63164B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <limits.h> 14 #include <math.h> 15 #include <stdio.h> 16 17 #include "aom_dsp/aom_dsp_common.h" 18 #include "aom_mem/aom_mem.h" 19 #include "aom_ports/bitops.h" 20 #include "aom_ports/mem.h" 21 #include "aom_ports/aom_once.h" 22 23 #include "av1/common/common.h" 24 #include "av1/common/entropy.h" 25 #include "av1/common/entropymode.h" 26 #include "av1/common/pred_common.h" 27 #include "av1/common/quant_common.h" 28 #include "av1/common/reconinter.h" 29 #include "av1/common/reconintra.h" 30 #include "av1/common/seg_common.h" 31 32 #include "av1/encoder/cost.h" 33 #include "av1/encoder/encodemv.h" 34 #include "av1/encoder/encoder.h" 35 #include "av1/encoder/nonrd_opt.h" 36 #include "av1/encoder/ratectrl.h" 37 #include "av1/encoder/rd.h" 38 #include "config/aom_config.h" 39 40 #define RD_THRESH_POW 1.25 41 42 // The baseline rd thresholds for breaking out of the rd loop for 43 // certain modes are assumed to be based on 8x8 blocks. 44 // This table is used to correct for block size. 45 // The factors here are << 2 (2 = x0.5, 32 = x8 etc). 46 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = { 47 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16 48 }; 49 50 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA] 51 [EXT_TX_SIZES] = { 52 { 1, 1, 1, 1 }, // unused 53 { 1, 1, 0, 0 }, 54 { 0, 0, 1, 0 }, 55 }; 56 57 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER] 58 [EXT_TX_SIZES] = { 59 { 1, 1, 1, 1 }, // unused 60 { 1, 1, 0, 0 }, 61 { 0, 0, 1, 0 }, 62 { 0, 1, 1, 1 }, 63 }; 64 65 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA, 66 EXT_TX_SETS_INTER)] = { 67 { 68 // Intra 69 EXT_TX_SET_DCTONLY, 70 EXT_TX_SET_DTT4_IDTX_1DDCT, 71 EXT_TX_SET_DTT4_IDTX, 72 }, 73 { 74 // Inter 75 EXT_TX_SET_DCTONLY, 76 EXT_TX_SET_ALL16, 77 EXT_TX_SET_DTT9_IDTX_1DDCT, 78 EXT_TX_SET_DCT_IDTX, 79 }, 80 }; 81 82 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs, 83 FRAME_CONTEXT *fc) { 84 int i, j; 85 86 for (i = 0; i < PARTITION_CONTEXTS; ++i) 87 av1_cost_tokens_from_cdf(mode_costs->partition_cost[i], 88 fc->partition_cdf[i], NULL); 89 90 if (cm->current_frame.skip_mode_info.skip_mode_flag) { 91 for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) { 92 av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i], 93 fc->skip_mode_cdfs[i], NULL); 94 } 95 } 96 97 for (i = 0; i < SKIP_CONTEXTS; ++i) { 98 av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i], 99 fc->skip_txfm_cdfs[i], NULL); 100 } 101 102 for (i = 0; i < KF_MODE_CONTEXTS; ++i) 103 for (j = 0; j < KF_MODE_CONTEXTS; ++j) 104 av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j], 105 fc->kf_y_cdf[i][j], NULL); 106 107 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) 108 av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i], 109 NULL); 110 for (i = 0; i < CFL_ALLOWED_TYPES; ++i) 111 for (j = 0; j < INTRA_MODES; ++j) 112 av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j], 113 fc->uv_mode_cdf[i][j], NULL); 114 115 av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost, 116 fc->filter_intra_mode_cdf, NULL); 117 for (i = 0; i < BLOCK_SIZES_ALL; ++i) { 118 if (av1_filter_intra_allowed_bsize(cm, i)) 119 av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i], 120 fc->filter_intra_cdfs[i], NULL); 121 } 122 123 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) 124 av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i], 125 fc->switchable_interp_cdf[i], NULL); 126 127 for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) { 128 av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i], 129 fc->palette_y_size_cdf[i], NULL); 130 av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i], 131 fc->palette_uv_size_cdf[i], NULL); 132 for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) { 133 av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j], 134 fc->palette_y_mode_cdf[i][j], NULL); 135 } 136 } 137 138 for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) { 139 av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i], 140 fc->palette_uv_mode_cdf[i], NULL); 141 } 142 143 for (i = 0; i < PALETTE_SIZES; ++i) { 144 for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) { 145 av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j], 146 fc->palette_y_color_index_cdf[i][j], NULL); 147 av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j], 148 fc->palette_uv_color_index_cdf[i][j], NULL); 149 } 150 } 151 152 int sign_cost[CFL_JOINT_SIGNS]; 153 av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL); 154 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) { 155 int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U]; 156 int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V]; 157 if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) { 158 memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u)); 159 } else { 160 const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; 161 av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL); 162 } 163 if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) { 164 memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v)); 165 } else { 166 const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; 167 av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL); 168 } 169 for (int u = 0; u < CFL_ALPHABET_SIZE; u++) 170 cost_u[u] += sign_cost[joint_sign]; 171 } 172 173 for (i = 0; i < MAX_TX_CATS; ++i) 174 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) 175 av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j], 176 fc->tx_size_cdf[i][j], NULL); 177 178 for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) { 179 av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i], 180 fc->txfm_partition_cdf[i], NULL); 181 } 182 183 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { 184 int s; 185 for (s = 1; s < EXT_TX_SETS_INTER; ++s) { 186 if (use_inter_ext_tx_for_txsize[s][i]) { 187 av1_cost_tokens_from_cdf( 188 mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i], 189 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]); 190 } 191 } 192 for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { 193 if (use_intra_ext_tx_for_txsize[s][i]) { 194 for (j = 0; j < INTRA_MODES; ++j) { 195 av1_cost_tokens_from_cdf( 196 mode_costs->intra_tx_type_costs[s][i][j], 197 fc->intra_ext_tx_cdf[s][i][j], 198 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]); 199 } 200 } 201 } 202 } 203 for (i = 0; i < DIRECTIONAL_MODES; ++i) { 204 av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i], 205 fc->angle_delta_cdf[i], NULL); 206 } 207 av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL); 208 209 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) { 210 av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i], 211 fc->seg.spatial_pred_seg_cdf[i], NULL); 212 } 213 214 for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) { 215 av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i], 216 NULL); 217 } 218 219 if (!frame_is_intra_only(cm)) { 220 for (i = 0; i < COMP_INTER_CONTEXTS; ++i) { 221 av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i], 222 fc->comp_inter_cdf[i], NULL); 223 } 224 225 for (i = 0; i < REF_CONTEXTS; ++i) { 226 for (j = 0; j < SINGLE_REFS - 1; ++j) { 227 av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j], 228 fc->single_ref_cdf[i][j], NULL); 229 } 230 } 231 232 for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) { 233 av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i], 234 fc->comp_ref_type_cdf[i], NULL); 235 } 236 237 for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) { 238 for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) { 239 av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j], 240 fc->uni_comp_ref_cdf[i][j], NULL); 241 } 242 } 243 244 for (i = 0; i < REF_CONTEXTS; ++i) { 245 for (j = 0; j < FWD_REFS - 1; ++j) { 246 av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j], 247 fc->comp_ref_cdf[i][j], NULL); 248 } 249 } 250 251 for (i = 0; i < REF_CONTEXTS; ++i) { 252 for (j = 0; j < BWD_REFS - 1; ++j) { 253 av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j], 254 fc->comp_bwdref_cdf[i][j], NULL); 255 } 256 } 257 258 for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) { 259 av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i], 260 fc->intra_inter_cdf[i], NULL); 261 } 262 263 for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) { 264 av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i], 265 NULL); 266 } 267 268 for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) { 269 av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i], 270 fc->zeromv_cdf[i], NULL); 271 } 272 273 for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) { 274 av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i], 275 NULL); 276 } 277 278 for (i = 0; i < DRL_MODE_CONTEXTS; ++i) { 279 av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i], 280 NULL); 281 } 282 for (i = 0; i < INTER_MODE_CONTEXTS; ++i) 283 av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i], 284 fc->inter_compound_mode_cdf[i], NULL); 285 for (i = 0; i < BLOCK_SIZES_ALL; ++i) 286 av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i], 287 fc->compound_type_cdf[i], NULL); 288 for (i = 0; i < BLOCK_SIZES_ALL; ++i) { 289 if (av1_is_wedge_used(i)) { 290 av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i], 291 fc->wedge_idx_cdf[i], NULL); 292 } 293 } 294 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { 295 av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i], 296 fc->interintra_cdf[i], NULL); 297 av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i], 298 fc->interintra_mode_cdf[i], NULL); 299 } 300 for (i = 0; i < BLOCK_SIZES_ALL; ++i) { 301 av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i], 302 fc->wedge_interintra_cdf[i], NULL); 303 } 304 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { 305 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i], 306 fc->motion_mode_cdf[i], NULL); 307 } 308 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { 309 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i], 310 fc->obmc_cdf[i], NULL); 311 } 312 for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) { 313 av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i], 314 fc->compound_index_cdf[i], NULL); 315 } 316 for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) { 317 av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i], 318 fc->comp_group_idx_cdf[i], NULL); 319 } 320 } 321 } 322 323 #if !CONFIG_REALTIME_ONLY 324 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) { 325 av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost, 326 fc->switchable_restore_cdf, NULL); 327 av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost, 328 fc->wiener_restore_cdf, NULL); 329 av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost, 330 fc->sgrproj_restore_cdf, NULL); 331 } 332 #endif // !CONFIG_REALTIME_ONLY 333 334 // Values are now correlated to quantizer. 335 static int sad_per_bit_lut_8[QINDEX_RANGE]; 336 static int sad_per_bit_lut_10[QINDEX_RANGE]; 337 static int sad_per_bit_lut_12[QINDEX_RANGE]; 338 339 static void init_me_luts_bd(int *bit16lut, int range, 340 aom_bit_depth_t bit_depth) { 341 int i; 342 // Initialize the sad lut tables using a formulaic calculation for now. 343 // This is to make it easier to resolve the impact of experimental changes 344 // to the quantizer tables. 345 for (i = 0; i < range; i++) { 346 const double q = av1_convert_qindex_to_q(i, bit_depth); 347 bit16lut[i] = (int)(0.0418 * q + 2.4107); 348 } 349 } 350 351 static void init_me_luts(void) { 352 init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8); 353 init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10); 354 init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12); 355 } 356 357 void av1_init_me_luts(void) { aom_once(init_me_luts); } 358 359 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12, 360 8, 8, 4, 4, 2, 2, 1, 0 }; 361 362 static const int rd_layer_depth_factor[7] = { 363 160, 160, 160, 160, 192, 208, 224 364 }; 365 366 // Returns the default rd multiplier for inter frames for a given qindex. 367 // The function here is a first pass estimate based on data from 368 // a previous Vizer run 369 static double def_inter_rd_multiplier(int qindex) { 370 return 3.2 + (0.0015 * (double)qindex); 371 } 372 373 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex. 374 // The function here is a first pass estimate based on data from 375 // a previous Vizer run 376 static double def_arf_rd_multiplier(int qindex) { 377 return 3.25 + (0.0015 * (double)qindex); 378 } 379 380 // Returns the default rd multiplier for key frames for a given qindex. 381 // The function here is a first pass estimate based on data from 382 // a previous Vizer run 383 static double def_kf_rd_multiplier(int qindex) { 384 return 3.3 + (0.0015 * (double)qindex); 385 } 386 387 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth, 388 FRAME_UPDATE_TYPE update_type, 389 int qindex, aom_tune_metric tuning) { 390 const int q = av1_dc_quant_QTX(qindex, 0, bit_depth); 391 int64_t rdmult = q * q; 392 if (update_type == KF_UPDATE) { 393 double def_rd_q_mult = def_kf_rd_multiplier(q); 394 rdmult = (int64_t)((double)rdmult * def_rd_q_mult); 395 } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) { 396 double def_rd_q_mult = def_arf_rd_multiplier(q); 397 rdmult = (int64_t)((double)rdmult * def_rd_q_mult); 398 } else { 399 double def_rd_q_mult = def_inter_rd_multiplier(q); 400 rdmult = (int64_t)((double)rdmult * def_rd_q_mult); 401 } 402 403 if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) { 404 // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image 405 // quality. The most noticeable effect is a mild bias towards choosing 406 // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8 407 // transforms). 408 // For very high qindexes, start progressively reducing the weight towards 409 // unity (128/128), as transforms are large enough and making them even 410 // larger actually harms subjective quality and SSIMULACRA 2 scores. 411 // This weight part of the equation was determined by iteratively increasing 412 // weight on CID22 and Daala's subset1, and observing its effects on visual 413 // quality and SSIMULACRA 2 scores along the usable (0-100) range. 414 // The ramp-down part of the equation was determined by choosing a fixed 415 // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA 416 // 2 scores for encodes with qindexes greater than 159 scored at or above 417 // their equivalents with no rdmult adjustment. 418 const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128; 419 rdmult = (int64_t)((double)rdmult * weight / 128.0); 420 } 421 422 switch (bit_depth) { 423 case AOM_BITS_8: break; 424 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; 425 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break; 426 default: 427 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); 428 return -1; 429 } 430 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1; 431 } 432 433 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth, 434 const FRAME_UPDATE_TYPE update_type, 435 const int layer_depth, const int boost_index, 436 const FRAME_TYPE frame_type, 437 const int use_fixed_qp_offsets, 438 const int is_stat_consumption_stage, 439 const aom_tune_metric tuning) { 440 int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, 441 qindex, tuning); 442 if (is_stat_consumption_stage && !use_fixed_qp_offsets && 443 (frame_type != KEY_FRAME)) { 444 // Layer depth adjustment 445 rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7; 446 // ARF boost adjustment 447 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); 448 } 449 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1; 450 } 451 452 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) { 453 assert(beta > 0.0); 454 int q = av1_dc_quant_QTX(qindex, 0, bit_depth); 455 int newq = (int)rint(q / sqrt(beta)); 456 int orig_qindex = qindex; 457 if (newq == q) { 458 return 0; 459 } 460 if (newq < q) { 461 while (qindex > 0) { 462 qindex--; 463 q = av1_dc_quant_QTX(qindex, 0, bit_depth); 464 if (newq >= q) { 465 break; 466 } 467 } 468 } else { 469 while (qindex < MAXQ) { 470 qindex++; 471 q = av1_dc_quant_QTX(qindex, 0, bit_depth); 472 if (newq <= q) { 473 break; 474 } 475 } 476 } 477 return qindex - orig_qindex; 478 } 479 480 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex, 481 int curr_qindex) { 482 curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res); 483 const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1; 484 const int deltaq_deadzone = delta_q_res / 4; 485 const int qmask = ~(delta_q_res - 1); 486 int abs_deltaq_index = abs(curr_qindex - prev_qindex); 487 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask; 488 int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index; 489 adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1); 490 return adjust_qindex; 491 } 492 493 #if !CONFIG_REALTIME_ONLY 494 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) { 495 assert(beta > 0.0); 496 const AV1_COMMON *cm = &cpi->common; 497 498 const GF_GROUP *const gf_group = &cpi->ppi->gf_group; 499 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); 500 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); 501 const FRAME_TYPE frame_type = cm->current_frame.frame_type; 502 503 const int qindex_rdmult = cm->quant_params.base_qindex; 504 return (int)(av1_compute_rd_mult( 505 qindex_rdmult, cm->seq_params->bit_depth, 506 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], 507 layer_depth, boost_index, frame_type, 508 cpi->oxcf.q_cfg.use_fixed_qp_offsets, 509 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) / 510 beta); 511 } 512 #endif // !CONFIG_REALTIME_ONLY 513 514 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) { 515 double q; 516 switch (bit_depth) { 517 case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break; 518 case AOM_BITS_10: 519 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0; 520 break; 521 case AOM_BITS_12: 522 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0; 523 break; 524 default: 525 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); 526 return -1; 527 } 528 // TODO(debargha): Adjust the function below. 529 return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); 530 } 531 532 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) { 533 switch (cpi->common.seq_params->bit_depth) { 534 case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break; 535 case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break; 536 case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break; 537 default: 538 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); 539 } 540 } 541 542 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd, 543 int use_nonrd_pick_mode) { 544 int i, bsize, segment_id; 545 THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 }; 546 int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES; 547 548 if (use_nonrd_pick_mode) { 549 for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) { 550 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0]; 551 if (ref != INTRA_FRAME) { 552 for (i = 0; i < RTC_INTER_MODES; i++) 553 mode_indices[num_modes_count++] = 554 mode_idx[ref][mode_offset(inter_mode_list[i])]; 555 } else { 556 for (i = 0; i < RTC_INTRA_MODES; i++) 557 mode_indices[num_modes_count++] = 558 mode_idx[ref][mode_offset(intra_mode_list[i])]; 559 } 560 } 561 } 562 563 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { 564 const int qindex = clamp( 565 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) + 566 cm->quant_params.y_dc_delta_q, 567 0, MAXQ); 568 const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth); 569 570 for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 571 // Threshold here seems unnecessarily harsh but fine given actual 572 // range of values used for cpi->sf.thresh_mult[]. 573 const int t = q * rd_thresh_block_size_factor[bsize]; 574 const int thresh_max = INT_MAX / t; 575 576 for (i = 0; i < num_modes_count; ++i) { 577 const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i; 578 rd->threshes[segment_id][bsize][mode_index] = 579 rd->thresh_mult[mode_index] < thresh_max 580 ? rd->thresh_mult[mode_index] * t / 4 581 : INT_MAX; 582 } 583 } 584 } 585 } 586 587 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc, 588 const int num_planes) { 589 const int nplanes = AOMMIN(num_planes, PLANE_TYPES); 590 for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) { 591 for (int plane = 0; plane < nplanes; ++plane) { 592 LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane]; 593 594 for (int ctx = 0; ctx < 2; ++ctx) { 595 aom_cdf_prob *pcdf; 596 switch (eob_multi_size) { 597 case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break; 598 case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break; 599 case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break; 600 case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break; 601 case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break; 602 case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break; 603 case 6: 604 default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break; 605 } 606 av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL); 607 } 608 } 609 } 610 for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) { 611 for (int plane = 0; plane < nplanes; ++plane) { 612 LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane]; 613 614 for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx) 615 av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx], 616 fc->txb_skip_cdf[tx_size][ctx], NULL); 617 618 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx) 619 av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx], 620 fc->coeff_base_eob_cdf[tx_size][plane][ctx], 621 NULL); 622 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) 623 av1_cost_tokens_from_cdf(pcost->base_cost[ctx], 624 fc->coeff_base_cdf[tx_size][plane][ctx], NULL); 625 626 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) { 627 pcost->base_cost[ctx][4] = 0; 628 pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] + 629 av1_cost_literal(1) - 630 pcost->base_cost[ctx][0]; 631 pcost->base_cost[ctx][6] = 632 pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1]; 633 pcost->base_cost[ctx][7] = 634 pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2]; 635 } 636 637 for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx) 638 av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx], 639 fc->eob_extra_cdf[tx_size][plane][ctx], NULL); 640 641 for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx) 642 av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx], 643 fc->dc_sign_cdf[plane][ctx], NULL); 644 645 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) { 646 int br_rate[BR_CDF_SIZE]; 647 int prev_cost = 0; 648 int i, j; 649 av1_cost_tokens_from_cdf( 650 br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx], 651 NULL); 652 for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) { 653 for (j = 0; j < BR_CDF_SIZE - 1; j++) { 654 pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j]; 655 } 656 prev_cost += br_rate[j]; 657 } 658 pcost->lps_cost[ctx][i] = prev_cost; 659 } 660 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) { 661 pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] = 662 pcost->lps_cost[ctx][0]; 663 for (int i = 1; i <= COEFF_BASE_RANGE; ++i) { 664 pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] = 665 pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1]; 666 } 667 } 668 } 669 } 670 } 671 672 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp, 673 MvCosts *mv_costs) { 674 // Avoid accessing 'mv_costs' when it is not allocated. 675 if (mv_costs == NULL) return; 676 677 mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX]; 678 mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX]; 679 mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX]; 680 mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX]; 681 if (integer_mv) { 682 mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost; 683 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack, 684 nmvc, MV_SUBPEL_NONE); 685 } else { 686 mv_costs->mv_cost_stack = 687 usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost; 688 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack, 689 nmvc, usehp); 690 } 691 } 692 693 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) { 694 dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX]; 695 dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX]; 696 av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc, 697 MV_SUBPEL_NONE); 698 } 699 700 // Populates speed features based on codec control settings (of type 701 // COST_UPDATE_TYPE) and expected speed feature settings (of type 702 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update. 703 // The populated/updated speed features are used for cost updates in the 704 // encoder. 705 // WARNING: Population of unified cost update frequency needs to be taken care 706 // accordingly, in case of any modifications/additions to the enum 707 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE. 708 static inline void populate_unified_cost_update_freq( 709 const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) { 710 INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf; 711 // Mapping of entropy cost update frequency from the encoder's codec control 712 // settings of type COST_UPDATE_TYPE to speed features of type 713 // INTERNAL_COST_UPDATE_TYPE. 714 static const INTERNAL_COST_UPDATE_TYPE 715 map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = { 716 INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE, 717 INTERNAL_COST_UPD_OFF 718 }; 719 720 inter_sf->mv_cost_upd_level = 721 AOMMIN(inter_sf->mv_cost_upd_level, 722 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]); 723 inter_sf->coeff_cost_upd_level = 724 AOMMIN(inter_sf->coeff_cost_upd_level, 725 map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]); 726 inter_sf->mode_cost_upd_level = 727 AOMMIN(inter_sf->mode_cost_upd_level, 728 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]); 729 sf->intra_sf.dv_cost_upd_level = 730 AOMMIN(sf->intra_sf.dv_cost_upd_level, 731 map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]); 732 } 733 734 // Checks if entropy costs should be initialized/updated at frame level or not. 735 static inline int is_frame_level_cost_upd_freq_set( 736 const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level, 737 const int use_nonrd_pick_mode, const int frames_since_key) { 738 const int fill_costs = 739 frame_is_intra_only(cm) || 740 (use_nonrd_pick_mode ? frames_since_key < 2 741 : (cm->current_frame.frame_number & 0x07) == 1); 742 return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) || 743 cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs); 744 } 745 746 // Decide whether we want to update the mode entropy cost for the current frame. 747 // The logit is currently inherited from selective_disable_cdf_rtc. 748 static inline int should_force_mode_cost_update(const AV1_COMP *cpi) { 749 const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf; 750 if (!rt_sf->frame_level_mode_cost_update) { 751 return false; 752 } 753 754 if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) { 755 return cpi->frames_since_last_update == 1; 756 } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) { 757 if (cpi->svc.number_spatial_layers == 1 && 758 cpi->svc.number_temporal_layers == 1) { 759 const AV1_COMMON *const cm = &cpi->common; 760 const RATE_CONTROL *const rc = &cpi->rc; 761 762 return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) || 763 rc->high_source_sad || rc->frames_since_key < 10 || 764 cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 || 765 cm->current_frame.frame_number % 8 == 0; 766 } else if (cpi->svc.number_temporal_layers > 1) { 767 return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1; 768 } 769 } 770 771 return false; 772 } 773 774 void av1_initialize_rd_consts(AV1_COMP *cpi) { 775 AV1_COMMON *const cm = &cpi->common; 776 MACROBLOCK *const x = &cpi->td.mb; 777 SPEED_FEATURES *const sf = &cpi->sf; 778 RD_OPT *const rd = &cpi->rd; 779 int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode; 780 int frames_since_key = cpi->rc.frames_since_key; 781 782 const GF_GROUP *const gf_group = &cpi->ppi->gf_group; 783 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); 784 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); 785 const FRAME_TYPE frame_type = cm->current_frame.frame_type; 786 787 const int qindex_rdmult = 788 cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q; 789 rd->RDMULT = av1_compute_rd_mult( 790 qindex_rdmult, cm->seq_params->bit_depth, 791 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, 792 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, 793 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); 794 #if CONFIG_RD_COMMAND 795 if (cpi->oxcf.pass == 2) { 796 const RD_COMMAND *rd_command = &cpi->rd_command; 797 if (rd_command->option_ls[rd_command->frame_index] == 798 RD_OPTION_SET_Q_RDMULT) { 799 rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index]; 800 } 801 } 802 #endif // CONFIG_RD_COMMAND 803 804 av1_set_error_per_bit(&x->errorperbit, rd->RDMULT); 805 806 set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode); 807 808 populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf); 809 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf; 810 // Frame level mv cost update 811 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level, 812 use_nonrd_pick_mode, frames_since_key)) 813 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv, 814 cm->features.allow_high_precision_mv, x->mv_costs); 815 816 // Frame level coefficient cost update 817 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level, 818 use_nonrd_pick_mode, frames_since_key)) 819 av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm)); 820 821 // Frame level mode cost update 822 if (should_force_mode_cost_update(cpi) || 823 is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level, 824 use_nonrd_pick_mode, frames_since_key)) 825 av1_fill_mode_rates(cm, &x->mode_costs, cm->fc); 826 827 // Frame level dv cost update 828 if (av1_need_dv_costs(cpi)) { 829 if (cpi->td.dv_costs_alloc == NULL) { 830 CHECK_MEM_ERROR( 831 cm, cpi->td.dv_costs_alloc, 832 (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc))); 833 cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc; 834 } 835 av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs); 836 } 837 } 838 839 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { 840 // NOTE: The tables below must be of the same size. 841 842 // The functions described below are sampled at the four most significant 843 // bits of x^2 + 8 / 256. 844 845 // Normalized rate: 846 // This table models the rate for a Laplacian source with given variance 847 // when quantized with a uniform quantizer with given stepsize. The 848 // closed form expression is: 849 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], 850 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), 851 // and H(x) is the binary entropy function. 852 static const int rate_tab_q10[] = { 853 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 854 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 855 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 856 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 857 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 858 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424, 859 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87, 860 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6, 861 5, 3, 2, 1, 1, 1, 0, 0, 862 }; 863 // Normalized distortion: 864 // This table models the normalized distortion for a Laplacian source 865 // with given variance when quantized with a uniform quantizer 866 // with given stepsize. The closed form expression is: 867 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) 868 // where x = qpstep / sqrt(variance). 869 // Note the actual distortion is Dn * variance. 870 static const int dist_tab_q10[] = { 871 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 872 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 873 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 874 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142, 875 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351, 876 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659, 877 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936, 878 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, 879 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, 880 }; 881 static const int xsq_iq_q10[] = { 882 0, 4, 8, 12, 16, 20, 24, 28, 32, 883 40, 48, 56, 64, 72, 80, 88, 96, 112, 884 128, 144, 160, 176, 192, 208, 224, 256, 288, 885 320, 352, 384, 416, 448, 480, 544, 608, 672, 886 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504, 887 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296, 888 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136, 889 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, 890 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736, 891 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696, 892 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808, 893 180192, 196576, 212960, 229344, 245728, 894 }; 895 const int tmp = (xsq_q10 >> 2) + 8; 896 const int k = get_msb(tmp) - 3; 897 const int xq = (k << 3) + ((tmp >> k) & 0x7); 898 const int one_q10 = 1 << 10; 899 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); 900 const int b_q10 = one_q10 - a_q10; 901 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; 902 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; 903 } 904 905 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2, 906 unsigned int qstep, int *rate, 907 int64_t *dist) { 908 // This function models the rate and distortion for a Laplacian 909 // source with given variance when quantized with a uniform quantizer 910 // with given stepsize. The closed form expressions are in: 911 // Hang and Chen, "Source Model for transform video coder and its 912 // application - Part I: Fundamental Theory", IEEE Trans. Circ. 913 // Sys. for Video Tech., April 1997. 914 if (var == 0) { 915 *rate = 0; 916 *dist = 0; 917 } else { 918 int d_q10, r_q10; 919 static const uint32_t MAX_XSQ_Q10 = 245727; 920 const uint64_t xsq_q10_64 = 921 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; 922 const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10); 923 model_rd_norm(xsq_q10, &r_q10, &d_q10); 924 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT); 925 *dist = (var * (int64_t)d_q10 + 512) >> 10; 926 } 927 } 928 929 static double interp_cubic(const double *p, double x) { 930 return p[1] + 0.5 * x * 931 (p[2] - p[0] + 932 x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + 933 x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); 934 } 935 936 /* 937 static double interp_bicubic(const double *p, int p_stride, double x, 938 double y) { 939 double q[4]; 940 q[0] = interp_cubic(p, x); 941 q[1] = interp_cubic(p + p_stride, x); 942 q[2] = interp_cubic(p + 2 * p_stride, x); 943 q[3] = interp_cubic(p + 3 * p_stride, x); 944 return interp_cubic(q, y); 945 } 946 */ 947 948 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = { 949 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3 950 }; 951 952 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) { 953 return (sse_norm > 16.0); 954 } 955 956 static const double interp_rgrid_curv[4][65] = { 957 { 958 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 959 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 960 0.000000, 118.257702, 120.210658, 121.434853, 122.100487, 961 122.377758, 122.436865, 72.290102, 96.974289, 101.652727, 962 126.830141, 140.417377, 157.644879, 184.315291, 215.823873, 963 262.300169, 335.919859, 420.624173, 519.185032, 619.854243, 964 726.053595, 827.663369, 933.127475, 1037.988755, 1138.839609, 965 1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052, 966 1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680, 967 2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011, 968 2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827, 969 2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773, 970 3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000, 971 }, 972 { 973 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 974 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 975 0.000000, 13.087244, 15.919735, 25.930313, 24.412411, 976 28.567417, 29.924194, 30.857010, 32.742979, 36.382570, 977 39.210386, 42.265690, 47.378572, 57.014850, 82.740067, 978 137.346562, 219.968084, 316.781856, 415.643773, 516.706538, 979 614.914364, 714.303763, 815.512135, 911.210485, 1008.501528, 980 1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641, 981 1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309, 982 1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824, 983 2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694, 984 2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660, 985 3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000, 986 }, 987 { 988 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 989 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 990 0.000000, 4.656893, 5.123633, 5.594132, 6.162376, 991 6.918433, 7.768444, 8.739415, 10.105862, 11.477328, 992 13.236604, 15.421030, 19.093623, 25.801871, 46.724612, 993 98.841054, 181.113466, 272.586364, 359.499769, 445.546343, 994 525.944439, 605.188743, 681.793483, 756.668359, 838.486885, 995 926.950356, 1015.482542, 1113.353926, 1204.897193, 1288.871992, 996 1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771, 997 1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872, 998 2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216, 999 2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436, 1000 3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000, 1001 }, 1002 { 1003 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1004 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 1005 0.000000, 0.337370, 0.391916, 0.468839, 0.566334, 1006 0.762564, 1.069225, 1.384361, 1.787581, 2.293948, 1007 3.251909, 4.412991, 8.050068, 11.606073, 27.668092, 1008 65.227758, 128.463938, 202.097653, 262.715851, 312.464873, 1009 355.601398, 400.609054, 447.201352, 495.761568, 552.871938, 1010 619.067625, 691.984883, 773.753288, 860.628503, 946.262808, 1011 1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987, 1012 1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823, 1013 1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119, 1014 2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754, 1015 3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000, 1016 }, 1017 }; 1018 1019 static const double interp_dgrid_curv[3][65] = { 1020 { 1021 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770, 1022 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870, 1023 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387, 1024 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790, 1025 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064, 1026 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123, 1027 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324, 1028 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733, 1029 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848, 1030 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550, 1031 0.000348, 0.000193, 0.000085, 0.000021, 0.000000, 1032 }, 1033 { 1034 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501, 1035 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967, 1036 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212, 1037 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519, 1038 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412, 1039 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825, 1040 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319, 1041 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733, 1042 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848, 1043 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550, 1044 0.000348, 0.000193, 0.000085, 0.000021, -0.000000, 1045 }, 1046 }; 1047 1048 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr, 1049 double *rate_f, double *distbysse_f) { 1050 const double x_start = -15.5; 1051 const double x_end = 16.5; 1052 const double x_step = 0.5; 1053 const double epsilon = 1e-6; 1054 const int rcat = bsize_curvfit_model_cat_lookup[bsize]; 1055 const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm); 1056 (void)x_end; 1057 1058 xqr = AOMMAX(xqr, x_start + x_step + epsilon); 1059 xqr = AOMMIN(xqr, x_end - x_step - epsilon); 1060 const double x = (xqr - x_start) / x_step; 1061 const int xi = (int)floor(x); 1062 const double xo = x - xi; 1063 1064 assert(xi > 0); 1065 1066 const double *prate = &interp_rgrid_curv[rcat][(xi - 1)]; 1067 *rate_f = interp_cubic(prate, xo); 1068 const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)]; 1069 *distbysse_f = interp_cubic(pdist, xo); 1070 } 1071 1072 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize, 1073 const struct macroblockd_plane *pd, 1074 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE], 1075 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) { 1076 const int num_4x4_w = mi_size_wide[plane_bsize]; 1077 const int num_4x4_h = mi_size_high[plane_bsize]; 1078 const ENTROPY_CONTEXT *const above = pd->above_entropy_context; 1079 const ENTROPY_CONTEXT *const left = pd->left_entropy_context; 1080 1081 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); 1082 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); 1083 } 1084 1085 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize, 1086 const struct macroblockd_plane *pd, 1087 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE], 1088 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) { 1089 assert(plane_bsize < BLOCK_SIZES_ALL); 1090 get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left); 1091 } 1092 1093 // Special clamping used in the encoder when calculating a prediction 1094 // 1095 // Logically, all pixel fetches used for prediction are clamped against the 1096 // edges of the frame. But doing this directly is slow, so instead we allocate 1097 // a finite border around the frame and fill it with copies of the outermost 1098 // pixels. 1099 // 1100 // Since this border is finite, we need to clamp the motion vector before 1101 // prediction in order to avoid out-of-bounds reads. At the same time, this 1102 // clamp must not change the prediction result. 1103 // 1104 // We can balance both of these concerns by calculating how far we would have 1105 // to go in each direction before the extended prediction region (the current 1106 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped 1107 // so that it touches the frame only at one row or column. This is a special 1108 // point because any more extreme MV will always lead to the same prediction. 1109 // So it is safe to clamp at that point. 1110 // 1111 // In the worst case, this requires a border of 1112 // max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels 1113 // around the frame edges. 1114 static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd, 1115 MV *mv) { 1116 int bw = xd->width << MI_SIZE_LOG2; 1117 int bh = xd->height << MI_SIZE_LOG2; 1118 1119 int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2; 1120 int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2; 1121 int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2; 1122 int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2; 1123 1124 const SubpelMvLimits mv_limits = { 1125 .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND), 1126 .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND), 1127 .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND), 1128 .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND) 1129 }; 1130 clamp_mv(mv, &mv_limits); 1131 } 1132 1133 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, 1134 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) { 1135 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; 1136 const int_mv ref_mv = 1137 av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext); 1138 const int_mv ref_mv1 = 1139 av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext); 1140 MV pred_mv[MAX_MV_REF_CANDIDATES + 1]; 1141 int num_mv_refs = 0; 1142 pred_mv[num_mv_refs++] = ref_mv.as_mv; 1143 if (ref_mv.as_int != ref_mv1.as_int) { 1144 pred_mv[num_mv_refs++] = ref_mv1.as_mv; 1145 } 1146 1147 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); 1148 1149 const uint8_t *const src_y_ptr = x->plane[0].src.buf; 1150 int zero_seen = 0; 1151 int best_sad = INT_MAX; 1152 int max_mv = 0; 1153 // Get the sad for each candidate reference mv. 1154 for (int i = 0; i < num_mv_refs; ++i) { 1155 MV *this_mv = &pred_mv[i]; 1156 enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv); 1157 1158 const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; 1159 const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; 1160 max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); 1161 1162 if (fp_row == 0 && fp_col == 0 && zero_seen) continue; 1163 zero_seen |= (fp_row == 0 && fp_col == 0); 1164 1165 const uint8_t *const ref_y_ptr = 1166 &ref_y_buffer[ref_y_stride * fp_row + fp_col]; 1167 // Find sad for current vector. 1168 const int this_sad = cpi->ppi->fn_ptr[block_size].sdf( 1169 src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride); 1170 // Note if it is the best so far. 1171 if (this_sad < best_sad) { 1172 best_sad = this_sad; 1173 } 1174 if (i == 0) 1175 x->pred_mv0_sad[ref_frame] = this_sad; 1176 else if (i == 1) 1177 x->pred_mv1_sad[ref_frame] = this_sad; 1178 } 1179 1180 // Note the index of the mv that worked best in the reference list. 1181 x->max_mv_context[ref_frame] = max_mv; 1182 x->pred_mv_sad[ref_frame] = best_sad; 1183 } 1184 1185 void av1_setup_pred_block(const MACROBLOCKD *xd, 1186 struct buf_2d dst[MAX_MB_PLANE], 1187 const YV12_BUFFER_CONFIG *src, 1188 const struct scale_factors *scale, 1189 const struct scale_factors *scale_uv, 1190 const int num_planes) { 1191 dst[0].buf = src->y_buffer; 1192 dst[0].stride = src->y_stride; 1193 dst[1].buf = src->u_buffer; 1194 dst[2].buf = src->v_buffer; 1195 dst[1].stride = dst[2].stride = src->uv_stride; 1196 1197 const int mi_row = xd->mi_row; 1198 const int mi_col = xd->mi_col; 1199 for (int i = 0; i < num_planes; ++i) { 1200 setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf, 1201 i ? src->uv_crop_width : src->y_crop_width, 1202 i ? src->uv_crop_height : src->y_crop_height, 1203 dst[i].stride, mi_row, mi_col, i ? scale_uv : scale, 1204 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); 1205 } 1206 } 1207 1208 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi, 1209 int ref_frame) { 1210 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME); 1211 RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1]; 1212 const RefCntBuffer *const ref_buf = 1213 get_ref_frame_buf(&cpi->common, ref_frame); 1214 return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf 1215 : NULL; 1216 } 1217 1218 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd, 1219 InterpFilter interp_filter, int dual_filter) { 1220 if (interp_filter == SWITCHABLE) { 1221 const MB_MODE_INFO *const mbmi = xd->mi[0]; 1222 int inter_filter_cost = 0; 1223 for (int dir = 0; dir < 2; ++dir) { 1224 if (dir && !dual_filter) break; 1225 const int ctx = av1_get_pred_context_switchable_interp(xd, dir); 1226 const InterpFilter filter = 1227 av1_extract_interp_filter(mbmi->interp_filters, dir); 1228 inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter]; 1229 } 1230 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost; 1231 } else { 1232 return 0; 1233 } 1234 } 1235 1236 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { 1237 RD_OPT *const rd = &cpi->rd; 1238 1239 // Set baseline threshold values. 1240 av1_zero(rd->thresh_mult); 1241 1242 rd->thresh_mult[THR_NEARESTMV] = 300; 1243 rd->thresh_mult[THR_NEARESTL2] = 300; 1244 rd->thresh_mult[THR_NEARESTL3] = 300; 1245 rd->thresh_mult[THR_NEARESTB] = 300; 1246 rd->thresh_mult[THR_NEARESTA2] = 300; 1247 rd->thresh_mult[THR_NEARESTA] = 300; 1248 rd->thresh_mult[THR_NEARESTG] = 300; 1249 1250 rd->thresh_mult[THR_NEWMV] = 1000; 1251 rd->thresh_mult[THR_NEWL2] = 1000; 1252 rd->thresh_mult[THR_NEWL3] = 1000; 1253 rd->thresh_mult[THR_NEWB] = 1000; 1254 rd->thresh_mult[THR_NEWA2] = 1100; 1255 rd->thresh_mult[THR_NEWA] = 1000; 1256 rd->thresh_mult[THR_NEWG] = 1000; 1257 1258 rd->thresh_mult[THR_NEARMV] = 1000; 1259 rd->thresh_mult[THR_NEARL2] = 1000; 1260 rd->thresh_mult[THR_NEARL3] = 1000; 1261 rd->thresh_mult[THR_NEARB] = 1000; 1262 rd->thresh_mult[THR_NEARA2] = 1000; 1263 rd->thresh_mult[THR_NEARA] = 1000; 1264 rd->thresh_mult[THR_NEARG] = 1000; 1265 1266 rd->thresh_mult[THR_GLOBALMV] = 2200; 1267 rd->thresh_mult[THR_GLOBALL2] = 2000; 1268 rd->thresh_mult[THR_GLOBALL3] = 2000; 1269 rd->thresh_mult[THR_GLOBALB] = 2400; 1270 rd->thresh_mult[THR_GLOBALA2] = 2000; 1271 rd->thresh_mult[THR_GLOBALG] = 2000; 1272 rd->thresh_mult[THR_GLOBALA] = 2400; 1273 1274 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100; 1275 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000; 1276 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800; 1277 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900; 1278 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000; 1279 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000; 1280 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000; 1281 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000; 1282 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000; 1283 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000; 1284 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000; 1285 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000; 1286 1287 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000; 1288 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000; 1289 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000; 1290 rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000; 1291 1292 rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200; 1293 rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500; 1294 rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500; 1295 rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530; 1296 rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870; 1297 rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400; 1298 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750; 1299 1300 rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200; 1301 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500; 1302 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500; 1303 rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870; 1304 rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700; 1305 rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800; 1306 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500; 1307 1308 rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200; 1309 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500; 1310 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500; 1311 rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700; 1312 rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700; 1313 rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000; 1314 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000; 1315 1316 rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320; 1317 rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500; 1318 rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500; 1319 rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040; 1320 rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700; 1321 rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000; 1322 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250; 1323 1324 rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200; 1325 rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500; 1326 rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500; 1327 rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360; 1328 rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700; 1329 rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400; 1330 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250; 1331 1332 rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200; 1333 rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500; 1334 rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500; 1335 rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700; 1336 rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700; 1337 rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000; 1338 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500; 1339 1340 rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200; 1341 rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500; 1342 rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500; 1343 rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870; 1344 rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700; 1345 rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000; 1346 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500; 1347 1348 rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200; 1349 rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500; 1350 rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500; 1351 rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700; 1352 rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700; 1353 rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000; 1354 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500; 1355 1356 rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200; 1357 rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800; 1358 rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500; 1359 rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700; 1360 rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700; 1361 rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000; 1362 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500; 1363 1364 rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200; 1365 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500; 1366 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500; 1367 rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700; 1368 rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700; 1369 rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000; 1370 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500; 1371 1372 rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440; 1373 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500; 1374 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500; 1375 rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700; 1376 rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700; 1377 rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000; 1378 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500; 1379 1380 rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200; 1381 rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500; 1382 rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500; 1383 rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700; 1384 rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700; 1385 rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000; 1386 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750; 1387 1388 rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600; 1389 rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000; 1390 rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000; 1391 rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640; 1392 rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200; 1393 rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400; 1394 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200; 1395 1396 rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600; 1397 rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000; 1398 rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800; 1399 rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200; 1400 rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200; 1401 rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400; 1402 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200; 1403 1404 rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760; 1405 rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400; 1406 rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000; 1407 rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760; 1408 rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640; 1409 rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400; 1410 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200; 1411 1412 rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600; 1413 rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000; 1414 rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000; 1415 rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200; 1416 rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980; 1417 rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640; 1418 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200; 1419 1420 rd->thresh_mult[THR_DC] = 1000; 1421 rd->thresh_mult[THR_PAETH] = 1000; 1422 rd->thresh_mult[THR_SMOOTH] = 2200; 1423 rd->thresh_mult[THR_SMOOTH_V] = 2000; 1424 rd->thresh_mult[THR_SMOOTH_H] = 2000; 1425 rd->thresh_mult[THR_H_PRED] = 2000; 1426 rd->thresh_mult[THR_V_PRED] = 1800; 1427 rd->thresh_mult[THR_D135_PRED] = 2500; 1428 rd->thresh_mult[THR_D203_PRED] = 2000; 1429 rd->thresh_mult[THR_D157_PRED] = 2500; 1430 rd->thresh_mult[THR_D67_PRED] = 2000; 1431 rd->thresh_mult[THR_D113_PRED] = 2500; 1432 rd->thresh_mult[THR_D45_PRED] = 2500; 1433 } 1434 1435 static inline void update_thr_fact(int (*factor_buf)[MAX_MODES], 1436 THR_MODES best_mode_index, 1437 THR_MODES mode_start, THR_MODES mode_end, 1438 BLOCK_SIZE min_size, BLOCK_SIZE max_size, 1439 int max_rd_thresh_factor) { 1440 for (THR_MODES mode = mode_start; mode < mode_end; ++mode) { 1441 for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) { 1442 int *const fact = &factor_buf[bs][mode]; 1443 if (mode == best_mode_index) { 1444 *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR); 1445 } else { 1446 *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor); 1447 } 1448 } 1449 } 1450 } 1451 1452 void av1_update_rd_thresh_fact( 1453 const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES], 1454 int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index, 1455 THR_MODES inter_mode_start, THR_MODES inter_mode_end, 1456 THR_MODES intra_mode_start, THR_MODES intra_mode_end) { 1457 assert(use_adaptive_rd_thresh > 0); 1458 const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT; 1459 1460 const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size; 1461 BLOCK_SIZE min_size, max_size; 1462 if (bsize_is_1_to_4) { 1463 // This part handles block sizes with 1:4 and 4:1 aspect ratios 1464 // TODO(any): Experiment with threshold update for parent/child blocks 1465 min_size = bsize; 1466 max_size = bsize; 1467 } else { 1468 min_size = AOMMAX(bsize - 2, BLOCK_4X4); 1469 max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size); 1470 } 1471 1472 update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end, 1473 min_size, max_size, max_rd_thresh_factor); 1474 update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end, 1475 min_size, max_size, max_rd_thresh_factor); 1476 } 1477 1478 int av1_get_intra_cost_penalty(int qindex, int qdelta, 1479 aom_bit_depth_t bit_depth) { 1480 const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth); 1481 switch (bit_depth) { 1482 case AOM_BITS_8: return 20 * q; 1483 case AOM_BITS_10: return 5 * q; 1484 case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2); 1485 default: 1486 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); 1487 return -1; 1488 } 1489 }