picklpf.c (18392B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <limits.h> 14 15 #include "config/aom_scale_rtcd.h" 16 17 #include "aom_dsp/aom_dsp_common.h" 18 #include "aom_dsp/psnr.h" 19 #include "aom_mem/aom_mem.h" 20 #include "aom_ports/mem.h" 21 22 #include "av1/common/av1_common_int.h" 23 #include "av1/common/av1_loopfilter.h" 24 #include "av1/common/quant_common.h" 25 26 #include "av1/encoder/av1_quantize.h" 27 #include "av1/encoder/encoder.h" 28 #include "av1/encoder/picklpf.h" 29 30 // AV1 loop filter applies to the whole frame according to mi_rows and mi_cols, 31 // which are calculated based on aligned width and aligned height, 32 // In addition, if super res is enabled, it copies the whole frame 33 // according to the aligned width and height (av1_superres_upscale()). 34 // So we need to copy the whole filtered region, instead of the cropped region. 35 // For example, input image size is: 160x90. 36 // Then src->y_crop_width = 160, src->y_crop_height = 90. 37 // The aligned frame size is: src->y_width = 160, src->y_height = 96. 38 // AV1 aligns frame size to a multiple of 8, if there is 39 // chroma subsampling, it is able to ensure the chroma is also 40 // an integer number of mi units. mi unit is 4x4, 8 = 4 * 2, and 2 luma mi 41 // units correspond to 1 chroma mi unit if there is subsampling. 42 // See: aom_realloc_frame_buffer() in yv12config.c. 43 static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc, 44 YV12_BUFFER_CONFIG *dst_bc, int plane) { 45 switch (plane) { 46 case 0: aom_yv12_copy_y(src_bc, dst_bc, 0); break; 47 case 1: aom_yv12_copy_u(src_bc, dst_bc, 0); break; 48 case 2: aom_yv12_copy_v(src_bc, dst_bc, 0); break; 49 default: assert(plane >= 0 && plane <= 2); break; 50 } 51 } 52 53 static int get_max_filter_level(const AV1_COMP *cpi) { 54 if (is_stat_consumption_stage_twopass(cpi)) { 55 return cpi->ppi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 56 : MAX_LOOP_FILTER; 57 } else { 58 return MAX_LOOP_FILTER; 59 } 60 } 61 62 static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, 63 AV1_COMP *const cpi, int filt_level, 64 int partial_frame, int plane, int dir) { 65 MultiThreadInfo *const mt_info = &cpi->mt_info; 66 int num_workers = mt_info->num_mod_workers[MOD_LPF]; 67 AV1_COMMON *const cm = &cpi->common; 68 int64_t filt_err; 69 70 assert(plane >= 0 && plane <= 2); 71 int filter_level[2] = { filt_level, filt_level }; 72 if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1]; 73 if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0]; 74 75 // set base filters for use of get_filter_level (av1_loopfilter.c) when in 76 // DELTA_LF mode 77 switch (plane) { 78 case 0: 79 cm->lf.filter_level[0] = filter_level[0]; 80 cm->lf.filter_level[1] = filter_level[1]; 81 break; 82 case 1: cm->lf.filter_level_u = filter_level[0]; break; 83 case 2: cm->lf.filter_level_v = filter_level[0]; break; 84 } 85 86 // lpf_opt_level = 1 : Enables dual/quad loop-filtering. 87 int lpf_opt_level = is_inter_tx_size_search_level_one(&cpi->sf.tx_sf); 88 89 av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane, 90 plane + 1, partial_frame, mt_info->workers, 91 num_workers, &mt_info->lf_row_sync, lpf_opt_level); 92 93 filt_err = aom_get_sse_plane(sd, &cm->cur_frame->buf, plane, 94 cm->seq_params->use_highbitdepth); 95 96 // Re-instate the unfiltered frame 97 yv12_copy_plane(&cpi->last_frame_uf, &cm->cur_frame->buf, plane); 98 99 return filt_err; 100 } 101 102 static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, 103 int partial_frame, 104 const int *last_frame_filter_level, int plane, 105 int dir, int64_t *best_filter_sse) { 106 const AV1_COMMON *const cm = &cpi->common; 107 const int min_filter_level = 0; 108 const int max_filter_level = get_max_filter_level(cpi); 109 int filt_direction = 0; 110 int64_t best_err; 111 int filt_best; 112 113 // Start the search at the previous frame filter level unless it is now out of 114 // range. 115 int lvl; 116 switch (plane) { 117 case 0: 118 switch (dir) { 119 case 2: 120 lvl = (last_frame_filter_level[0] + last_frame_filter_level[1] + 1) >> 121 1; 122 break; 123 case 0: 124 case 1: lvl = last_frame_filter_level[dir]; break; 125 default: assert(dir >= 0 && dir <= 2); return 0; 126 } 127 break; 128 case 1: lvl = last_frame_filter_level[2]; break; 129 case 2: lvl = last_frame_filter_level[3]; break; 130 default: assert(plane >= 0 && plane <= 2); return 0; 131 } 132 int filt_mid = clamp(lvl, min_filter_level, max_filter_level); 133 int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; 134 // Sum squared error at each filter level 135 int64_t ss_err[MAX_LOOP_FILTER + 1]; 136 137 const int use_coarse_search = cpi->sf.lpf_sf.use_coarse_filter_level_search; 138 assert(use_coarse_search <= 1); 139 static const int min_filter_step_lookup[2] = { 0, 2 }; 140 // min_filter_step_thesh determines the stopping criteria for the search. 141 // The search is terminated when filter_step equals min_filter_step_thesh. 142 const int min_filter_step_thesh = min_filter_step_lookup[use_coarse_search]; 143 144 // Set each entry to -1 145 memset(ss_err, 0xFF, sizeof(ss_err)); 146 yv12_copy_plane(&cm->cur_frame->buf, &cpi->last_frame_uf, plane); 147 best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir); 148 filt_best = filt_mid; 149 ss_err[filt_mid] = best_err; 150 151 while (filter_step > min_filter_step_thesh) { 152 const int filt_high = AOMMIN(filt_mid + filter_step, max_filter_level); 153 const int filt_low = AOMMAX(filt_mid - filter_step, min_filter_level); 154 155 // Bias against raising loop filter in favor of lowering it. 156 int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; 157 158 if ((is_stat_consumption_stage_twopass(cpi)) && 159 (cpi->ppi->twopass.section_intra_rating < 20)) 160 bias = (bias * cpi->ppi->twopass.section_intra_rating) / 20; 161 162 // yx, bias less for large block size 163 if (cm->features.tx_mode != ONLY_4X4) bias >>= 1; 164 165 if (filt_direction <= 0 && filt_low != filt_mid) { 166 // Get Low filter error score 167 if (ss_err[filt_low] < 0) { 168 ss_err[filt_low] = 169 try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir); 170 } 171 // If value is close to the best so far then bias towards a lower loop 172 // filter value. 173 if (ss_err[filt_low] < (best_err + bias)) { 174 // Was it actually better than the previous best? 175 if (ss_err[filt_low] < best_err) { 176 best_err = ss_err[filt_low]; 177 } 178 filt_best = filt_low; 179 } 180 } 181 182 // Now look at filt_high 183 if (filt_direction >= 0 && filt_high != filt_mid) { 184 if (ss_err[filt_high] < 0) { 185 ss_err[filt_high] = 186 try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir); 187 } 188 // If value is significantly better than previous best, bias added against 189 // raising filter value 190 if (ss_err[filt_high] < (best_err - bias)) { 191 best_err = ss_err[filt_high]; 192 filt_best = filt_high; 193 } 194 } 195 196 // Half the step distance if the best filter value was the same as last time 197 if (filt_best == filt_mid) { 198 filter_step /= 2; 199 filt_direction = 0; 200 } else { 201 filt_direction = (filt_best < filt_mid) ? -1 : 1; 202 filt_mid = filt_best; 203 } 204 } 205 206 *best_filter_sse = ss_err[filt_best]; 207 208 return filt_best; 209 } 210 211 void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, 212 LPF_PICK_METHOD method) { 213 AV1_COMMON *const cm = &cpi->common; 214 const SequenceHeader *const seq_params = cm->seq_params; 215 const int num_planes = av1_num_planes(cm); 216 struct loopfilter *const lf = &cm->lf; 217 int disable_filter_rt_screen = 0; 218 (void)sd; 219 220 // Enable loop filter sharpness only for allintra encoding mode, 221 // as frames do not have to serve as references to others 222 lf->sharpness_level = 223 cpi->oxcf.mode == ALLINTRA ? cpi->oxcf.algo_cfg.sharpness : 0; 224 225 if (cpi->oxcf.algo_cfg.enable_adaptive_sharpness) { 226 // Loop filter sharpness levels are highly nonlinear. Visually, lf sharpness 227 // 1 is closer to 7 than it is to 0, so in practice adaptive sharpness is 228 // written to pick levels 0, 1 and 7 to keep it simple. 229 int max_lf_sharpness; 230 231 if (cm->quant_params.base_qindex <= 120) { 232 max_lf_sharpness = 7; 233 } else if (cm->quant_params.base_qindex <= 160) { 234 max_lf_sharpness = 1; 235 } else { 236 max_lf_sharpness = 0; 237 } 238 239 lf->sharpness_level = AOMMIN(lf->sharpness_level, max_lf_sharpness); 240 } 241 242 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && 243 cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && 244 cpi->sf.rt_sf.skip_lf_screen) 245 disable_filter_rt_screen = av1_cyclic_refresh_disable_lf_cdef(cpi); 246 247 if (disable_filter_rt_screen || 248 cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_NONE || 249 (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_REFERENCE && 250 cpi->ppi->rtc_ref.non_reference_frame)) { 251 lf->filter_level[0] = 0; 252 lf->filter_level[1] = 0; 253 return; 254 } 255 256 if (method == LPF_PICK_MINIMAL_LPF) { 257 lf->filter_level[0] = 0; 258 lf->filter_level[1] = 0; 259 } else if (method >= LPF_PICK_FROM_Q) { 260 const int min_filter_level = 0; 261 const int max_filter_level = get_max_filter_level(cpi); 262 const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, 263 seq_params->bit_depth); 264 // based on tests result for rtc test set 265 // 0.04590 boosted or 0.02295 non-booseted in 18-bit fixed point 266 const int strength_boost_q_treshold = 0; 267 int inter_frame_multiplier = 268 (q > strength_boost_q_treshold || 269 (cpi->sf.rt_sf.use_nonrd_pick_mode && 270 cpi->common.width * cpi->common.height > 352 * 288)) 271 ? 12034 272 : 6017; 273 // Increase strength on base TL0 for temporal layers, for low-resoln, 274 // based on frame source_sad. 275 if (cpi->svc.number_temporal_layers > 1 && 276 cpi->svc.temporal_layer_id == 0 && 277 cpi->common.width * cpi->common.height <= 352 * 288 && 278 cpi->sf.rt_sf.use_nonrd_pick_mode) { 279 if (cpi->rc.frame_source_sad > 100000) 280 inter_frame_multiplier = inter_frame_multiplier << 1; 281 else if (cpi->rc.frame_source_sad > 50000) 282 inter_frame_multiplier = 3 * (inter_frame_multiplier >> 1); 283 } else if (cpi->sf.rt_sf.use_fast_fixed_part) { 284 inter_frame_multiplier = inter_frame_multiplier << 1; 285 } 286 // These values were determined by linear fitting the result of the 287 // searched level for 8 bit depth: 288 // Keyframes: filt_guess = q * 0.06699 - 1.60817 289 // Other frames: filt_guess = q * inter_frame_multiplier + 2.48225 290 // 291 // And high bit depth separately: 292 // filt_guess = q * 0.316206 + 3.87252 293 int filt_guess; 294 switch (seq_params->bit_depth) { 295 case AOM_BITS_8: 296 filt_guess = 297 (cm->current_frame.frame_type == KEY_FRAME) 298 ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18) 299 : ROUND_POWER_OF_TWO(q * inter_frame_multiplier + 650707, 18); 300 break; 301 case AOM_BITS_10: 302 filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); 303 break; 304 case AOM_BITS_12: 305 filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); 306 break; 307 default: 308 assert(0 && 309 "bit_depth should be AOM_BITS_8, AOM_BITS_10 " 310 "or AOM_BITS_12"); 311 return; 312 } 313 if (seq_params->bit_depth != AOM_BITS_8 && 314 cm->current_frame.frame_type == KEY_FRAME) 315 filt_guess -= 4; 316 // TODO(chengchen): retrain the model for Y, U, V filter levels 317 lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level); 318 lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level); 319 lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level); 320 lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level); 321 if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY && 322 !frame_is_intra_only(cm) && !cpi->rc.high_source_sad) { 323 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { 324 lf->filter_level[0] = 0; 325 lf->filter_level[1] = 0; 326 } else { 327 const int num4x4 = (cm->width >> 2) * (cm->height >> 2); 328 const int newmv_thresh = 7; 329 const int distance_since_key_thresh = 5; 330 if ((cpi->td.rd_counts.newmv_or_intra_blocks * 100 / num4x4) < 331 newmv_thresh && 332 cpi->rc.frames_since_key > distance_since_key_thresh) { 333 lf->filter_level[0] = 0; 334 lf->filter_level[1] = 0; 335 } 336 } 337 } 338 } else { 339 int last_frame_filter_level[4] = { 0 }; 340 if (!frame_is_intra_only(cm)) { 341 last_frame_filter_level[0] = cpi->ppi->filter_level[0]; 342 last_frame_filter_level[1] = cpi->ppi->filter_level[1]; 343 last_frame_filter_level[2] = cpi->ppi->filter_level_u; 344 last_frame_filter_level[3] = cpi->ppi->filter_level_v; 345 } 346 // The frame buffer last_frame_uf is used to store the non-loop filtered 347 // reconstructed frame in search_filter_level(). 348 if (aom_realloc_frame_buffer( 349 &cpi->last_frame_uf, cm->width, cm->height, 350 seq_params->subsampling_x, seq_params->subsampling_y, 351 seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels, 352 cm->features.byte_alignment, NULL, NULL, NULL, false, 0)) 353 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, 354 "Failed to allocate last frame buffer"); 355 356 int64_t zero_filter_sse[MAX_MB_PLANE] = { 0 }; 357 int64_t best_filter_sse[MAX_MB_PLANE] = { 0 }; 358 359 if (cpi->sf.lpf_sf.skip_loop_filter_using_filt_error >= 1) { 360 for (int plane = 0; plane < num_planes; plane++) { 361 zero_filter_sse[plane] = aom_get_sse_plane( 362 sd, &cm->cur_frame->buf, plane, cm->seq_params->use_highbitdepth); 363 } 364 } 365 366 lf->filter_level[0] = lf->filter_level[1] = 367 search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, 368 last_frame_filter_level, 0, 2, &best_filter_sse[0]); 369 if (method != LPF_PICK_FROM_FULL_IMAGE_NON_DUAL) { 370 lf->filter_level[0] = search_filter_level( 371 sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, last_frame_filter_level, 0, 372 0, &best_filter_sse[0]); 373 lf->filter_level[1] = search_filter_level( 374 sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, last_frame_filter_level, 0, 375 1, &best_filter_sse[0]); 376 } 377 378 if (num_planes > 1) { 379 lf->filter_level_u = search_filter_level( 380 sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, last_frame_filter_level, 1, 381 0, &best_filter_sse[1]); 382 lf->filter_level_v = search_filter_level( 383 sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, last_frame_filter_level, 2, 384 0, &best_filter_sse[2]); 385 } 386 387 lf->backup_filter_level[0] = lf->filter_level[0]; 388 lf->backup_filter_level[1] = lf->filter_level[1]; 389 lf->backup_filter_level_u = lf->filter_level_u; 390 lf->backup_filter_level_v = lf->filter_level_v; 391 392 if (cpi->sf.lpf_sf.adaptive_luma_loop_filter_skip >= 1) { 393 int32_t min_ref_filter_level[2] = { MAX_LOOP_FILTER, MAX_LOOP_FILTER }; 394 // Find the minimum luma filter levels across all reference frames. 395 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) { 396 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref); 397 if (buf == NULL) continue; 398 399 if (buf->filter_level[0] != -1) 400 min_ref_filter_level[0] = 401 AOMMIN(min_ref_filter_level[0], buf->filter_level[0]); 402 if (buf->filter_level[1] != -1) 403 min_ref_filter_level[1] = 404 AOMMIN(min_ref_filter_level[1], buf->filter_level[1]); 405 } 406 407 // Reset luma filter levels to zero based on minimum filter levels of 408 // reference frames and current frame's pyramid level. 409 unsigned int pyramid_level = cm->current_frame.pyramid_level; 410 if (pyramid_level > 1) { 411 int filter_threshold; 412 if (pyramid_level >= 5) 413 filter_threshold = 32; 414 else if (pyramid_level >= 4) 415 filter_threshold = 16; 416 else 417 filter_threshold = 8; 418 419 const bool reset_filter_level_y = 420 lf->filter_level[0] < filter_threshold && 421 lf->filter_level[1] < filter_threshold && 422 lf->filter_level_u < filter_threshold && 423 lf->filter_level_v < filter_threshold && 424 min_ref_filter_level[0] == 0 && min_ref_filter_level[1] == 0; 425 if (reset_filter_level_y) { 426 lf->filter_level[0] = 0; 427 lf->filter_level[1] = 0; 428 } 429 } 430 } 431 432 if (lf->filter_level[0] != 0 && lf->filter_level[1] != 0 && 433 cpi->sf.lpf_sf.skip_loop_filter_using_filt_error >= 1) { 434 const double pct_improvement_thresh = 2.0; 435 bool reset_filter_level_y = true; 436 437 // Calculate the percentage improvement in SSE for each plane. This 438 // measures the relative reduction in error when applying the filter 439 // compared to no filtering. 440 for (int plane = 0; plane < num_planes; plane++) { 441 const double pct_improvement_sse = 442 ((zero_filter_sse[plane] - best_filter_sse[plane]) * 100.0) / 443 zero_filter_sse[plane]; 444 reset_filter_level_y &= pct_improvement_sse < pct_improvement_thresh; 445 } 446 447 if (reset_filter_level_y) { 448 lf->filter_level[0] = 0; 449 lf->filter_level[1] = 0; 450 } 451 } 452 453 // Store the current frame's filter levels to be referenced 454 // while determining the minimum filter level from reference frames. 455 cm->cur_frame->filter_level[0] = lf->filter_level[0]; 456 cm->cur_frame->filter_level[1] = lf->filter_level[1]; 457 } 458 }