aq_variance.c (11525B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <math.h> 13 #include <stdlib.h> 14 15 #include "aom_dsp/aom_dsp_common.h" 16 #include "aom_ports/mem.h" 17 18 #include "av1/encoder/aq_variance.h" 19 #include "av1/common/seg_common.h" 20 #include "av1/encoder/encodeframe.h" 21 #include "av1/encoder/ratectrl.h" 22 #include "av1/encoder/rd.h" 23 #include "av1/encoder/segmentation.h" 24 #include "av1/encoder/dwt.h" 25 #include "config/aom_config.h" 26 27 #if !CONFIG_REALTIME_ONLY 28 static const double rate_ratio[MAX_SEGMENTS] = { 2.2, 1.7, 1.3, 1.0, 29 0.9, .8, .7, .6 }; 30 31 static const double deltaq_rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0, 32 0.75, 1.0, 1.0, 1.0 }; 33 #define ENERGY_MIN (-4) 34 #define ENERGY_MAX (1) 35 #define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1) 36 #define ENERGY_IN_BOUNDS(energy) \ 37 assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX) 38 39 static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 }; 40 41 #define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] 42 43 void av1_vaq_frame_setup(AV1_COMP *cpi) { 44 AV1_COMMON *cm = &cpi->common; 45 const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame; 46 const int base_qindex = cm->quant_params.base_qindex; 47 struct segmentation *seg = &cm->seg; 48 int i; 49 50 int resolution_change = 51 cm->prev_frame && (cm->width != cm->prev_frame->width || 52 cm->height != cm->prev_frame->height); 53 int avg_energy = (int)(cpi->twopass_frame.mb_av_energy - 2); 54 double avg_ratio; 55 if (avg_energy > 7) avg_energy = 7; 56 if (avg_energy < 0) avg_energy = 0; 57 avg_ratio = rate_ratio[avg_energy]; 58 59 if (resolution_change) { 60 memset(cpi->enc_seg.map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols); 61 av1_clearall_segfeatures(seg); 62 av1_disable_segmentation(seg); 63 return; 64 } 65 if (frame_is_intra_only(cm) || cm->features.error_resilient_mode || 66 refresh_frame->alt_ref_frame || 67 (refresh_frame->golden_frame && !cpi->rc.is_src_frame_alt_ref)) { 68 cpi->vaq_refresh = 1; 69 70 av1_enable_segmentation(seg); 71 av1_clearall_segfeatures(seg); 72 73 for (i = 0; i < MAX_SEGMENTS; ++i) { 74 // Set up avg segment id to be 1.0 and adjust the other segments around 75 // it. 76 int qindex_delta = 77 av1_compute_qdelta_by_rate(cpi, cm->current_frame.frame_type, 78 base_qindex, rate_ratio[i] / avg_ratio); 79 80 // We don't allow qindex 0 in a segment if the base value is not 0. 81 // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment 82 // Q delta is sometimes applied without going back around the rd loop. 83 // This could lead to an illegal combination of partition size and q. 84 if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) { 85 qindex_delta = -base_qindex + 1; 86 } 87 88 av1_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta); 89 av1_enable_segfeature(seg, i, SEG_LVL_ALT_Q); 90 } 91 } 92 } 93 94 int av1_log_block_avg(const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bs, 95 int mi_row, int mi_col) { 96 // This functions returns the block average of luma block 97 unsigned int sum, avg, num_pix; 98 int r, c; 99 const int pic_w = cpi->common.width; 100 const int pic_h = cpi->common.height; 101 const int bw = MI_SIZE * mi_size_wide[bs]; 102 const int bh = MI_SIZE * mi_size_high[bs]; 103 const uint16_t *x16 = CONVERT_TO_SHORTPTR(x->plane[0].src.buf); 104 105 sum = 0; 106 num_pix = 0; 107 avg = 0; 108 int row = mi_row << MI_SIZE_LOG2; 109 int col = mi_col << MI_SIZE_LOG2; 110 for (r = row; (r < (row + bh)) && (r < pic_h); r++) { 111 for (c = col; (c < (col + bw)) && (c < pic_w); c++) { 112 sum += *(x16 + r * x->plane[0].src.stride + c); 113 num_pix++; 114 } 115 } 116 if (num_pix != 0) { 117 avg = sum / num_pix; 118 } 119 return avg; 120 } 121 122 #define DEFAULT_E_MIDPOINT 10.0 123 124 static unsigned int haar_ac_energy(const MACROBLOCK *x, BLOCK_SIZE bs) { 125 const MACROBLOCKD *xd = &x->e_mbd; 126 int stride = x->plane[0].src.stride; 127 const uint8_t *buf = x->plane[0].src.buf; 128 const int num_8x8_cols = block_size_wide[bs] / 8; 129 const int num_8x8_rows = block_size_high[bs] / 8; 130 const int hbd = is_cur_buf_hbd(xd); 131 132 int64_t var = av1_haar_ac_sad_mxn_uint8_input(buf, stride, hbd, num_8x8_rows, 133 num_8x8_cols); 134 135 return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs]; 136 } 137 138 static double log_block_wavelet_energy(const MACROBLOCK *x, BLOCK_SIZE bs) { 139 unsigned int haar_sad = haar_ac_energy(x, bs); 140 return log1p(haar_sad); 141 } 142 143 int av1_block_wavelet_energy_level(const AV1_COMP *cpi, const MACROBLOCK *x, 144 BLOCK_SIZE bs) { 145 double energy, energy_midpoint; 146 energy_midpoint = (is_stat_consumption_stage_twopass(cpi)) 147 ? cpi->twopass_frame.frame_avg_haar_energy 148 : DEFAULT_E_MIDPOINT; 149 energy = log_block_wavelet_energy(x, bs) - energy_midpoint; 150 return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX); 151 } 152 153 int av1_compute_q_from_energy_level_deltaq_mode(const AV1_COMP *const cpi, 154 int block_var_level) { 155 int rate_level; 156 const AV1_COMMON *const cm = &cpi->common; 157 158 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) { 159 ENERGY_IN_BOUNDS(block_var_level); 160 rate_level = SEGMENT_ID(block_var_level); 161 } else { 162 rate_level = block_var_level; 163 } 164 const int base_qindex = cm->quant_params.base_qindex; 165 int qindex_delta = 166 av1_compute_qdelta_by_rate(cpi, cm->current_frame.frame_type, base_qindex, 167 deltaq_rate_ratio[rate_level]); 168 169 if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) { 170 qindex_delta = -base_qindex + 1; 171 } 172 return base_qindex + qindex_delta; 173 } 174 175 // Comparer used by qsort() to order an array of unsigned int from smallest to 176 // largest. 177 static int comp_unsigned_int(const void *a, const void *b) { 178 unsigned int arg1 = *(const unsigned int *)a; 179 unsigned int arg2 = *(const unsigned int *)b; 180 181 return (arg1 > arg2) - (arg1 < arg2); 182 } 183 184 unsigned int av1_get_variance_boost_block_variance(const AV1_COMP *cpi, 185 const MACROBLOCK *x) { 186 #define SUPERBLOCK_SIZE 64 187 #define SUBBLOCK_SIZE 8 188 #define SUBBLOCKS_IN_SB_DIM (SUPERBLOCK_SIZE / SUBBLOCK_SIZE) 189 #define SUBBLOCKS_IN_SB (SUBBLOCKS_IN_SB_DIM * SUBBLOCKS_IN_SB_DIM) 190 #define SUBBLOCKS_IN_OCTILE (SUBBLOCKS_IN_SB / 8) 191 DECLARE_ALIGNED(16, static const uint16_t, 192 av1_highbd_all_zeros[SUBBLOCK_SIZE]) = { 0 }; 193 DECLARE_ALIGNED(16, static const uint8_t, 194 av1_all_zeros[SUBBLOCK_SIZE]) = { 0 }; 195 196 const MACROBLOCKD *xd = &x->e_mbd; 197 unsigned int sse; 198 // Octile is currently hard-coded and optimized for still pictures. In the 199 // future, we might want to expose this as a parameter that can be fine-tuned 200 // by the caller. 201 // An octile of 5 was chosen because it was found to strike the best balance 202 // between quality and consistency. Lower octiles tend to score lower in 203 // SSIMU2, while higher octiles tend to harm subjective quality consistency, 204 // especially in <1 MP images. 205 const int octile = 5; 206 const uint8_t *all_zeros = is_cur_buf_hbd(xd) 207 ? CONVERT_TO_BYTEPTR(av1_highbd_all_zeros) 208 : av1_all_zeros; 209 unsigned int variances[SUBBLOCKS_IN_SB]; 210 211 // Calculate subblock variances. 212 aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_8X8].vf; 213 for (int subb_i = 0; subb_i < SUBBLOCKS_IN_SB_DIM; subb_i++) { 214 int i = subb_i * SUBBLOCK_SIZE; 215 for (int subb_j = 0; subb_j < SUBBLOCKS_IN_SB_DIM; subb_j++) { 216 int j = subb_j * SUBBLOCK_SIZE; 217 // Truncating values to integers (i.e. the 64 term) was found to perform 218 // better than rounding, or returning them as doubles. 219 variances[subb_i * SUBBLOCKS_IN_SB_DIM + subb_j] = 220 vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j, 221 x->plane[0].src.stride, all_zeros, 0, &sse) / 222 64; 223 } 224 } 225 226 // Order the 8x8 SB values from smallest to largest variance. 227 qsort(variances, SUBBLOCKS_IN_SB, sizeof(unsigned int), comp_unsigned_int); 228 229 // Sample three 8x8 variance values: at the specified octile, previous octile, 230 // and next octile. Make sure we use the last subblock in each octile as the 231 // representative of the octile. 232 assert(octile >= 1 && octile <= 8); 233 const int middle_index = octile * SUBBLOCKS_IN_OCTILE - 1; 234 const int lower_index = 235 AOMMAX(SUBBLOCKS_IN_OCTILE - 1, middle_index - SUBBLOCKS_IN_OCTILE); 236 const int upper_index = 237 AOMMIN(SUBBLOCKS_IN_SB - 1, middle_index + SUBBLOCKS_IN_OCTILE); 238 239 // Weigh the three variances in a 1:2:1 ratio, with rounding (the +2 term). 240 // This allows for smoother delta-q transitions among superblocks with 241 // mixed-variance features. 242 const unsigned int variance = 243 (variances[lower_index] + (variances[middle_index] * 2) + 244 variances[upper_index] + 2) / 245 4; 246 247 return variance; 248 } 249 #endif // !CONFIG_REALTIME_ONLY 250 251 int av1_log_block_var(const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bs) { 252 DECLARE_ALIGNED(16, static const uint16_t, 253 av1_highbd_all_zeros[MAX_SB_SIZE]) = { 0 }; 254 DECLARE_ALIGNED(16, static const uint8_t, av1_all_zeros[MAX_SB_SIZE]) = { 0 }; 255 256 // This function returns a score for the blocks local variance as calculated 257 // by: sum of the log of the (4x4 variances) of each subblock to the current 258 // block (x,bs) 259 // * 32 / number of pixels in the block_size. 260 // This is used for segmentation because to avoid situations in which a large 261 // block with a gentle gradient gets marked high variance even though each 262 // subblock has a low variance. This allows us to assign the same segment 263 // number for the same sorts of area regardless of how the partitioning goes. 264 265 const MACROBLOCKD *xd = &x->e_mbd; 266 double var = 0; 267 unsigned int sse; 268 int i, j; 269 270 int right_overflow = 271 (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; 272 int bottom_overflow = 273 (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; 274 275 const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow; 276 const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow; 277 278 aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf; 279 for (i = 0; i < bh; i += 4) { 280 for (j = 0; j < bw; j += 4) { 281 if (is_cur_buf_hbd(xd)) { 282 var += log1p(vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j, 283 x->plane[0].src.stride, 284 CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) / 285 16.0); 286 } else { 287 var += log1p(vf(x->plane[0].src.buf + i * x->plane[0].src.stride + j, 288 x->plane[0].src.stride, av1_all_zeros, 0, &sse) / 289 16.0); 290 } 291 } 292 } 293 // Use average of 4x4 log variance. The range for 8 bit 0 - 9.704121561. 294 var /= (bw / 4 * bh / 4); 295 if (var > 7) var = 7; 296 297 return (int)(var); 298 }