encodemv.c (12779B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <math.h> 13 14 #include "av1/common/common.h" 15 #include "av1/common/entropymode.h" 16 17 #include "av1/encoder/cost.h" 18 #include "av1/encoder/encodemv.h" 19 20 #include "aom_dsp/aom_dsp_common.h" 21 #include "aom_ports/bitops.h" 22 23 static void update_mv_component_stats(int comp, nmv_component *mvcomp, 24 MvSubpelPrecision precision) { 25 assert(comp != 0); 26 int offset; 27 const int sign = comp < 0; 28 const int mag = sign ? -comp : comp; 29 const int mv_class = av1_get_mv_class(mag - 1, &offset); 30 const int d = offset >> 3; // int mv data 31 const int fr = (offset >> 1) & 3; // fractional mv data 32 const int hp = offset & 1; // high precision mv data 33 34 // Sign 35 update_cdf(mvcomp->sign_cdf, sign, 2); 36 37 // Class 38 update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES); 39 40 // Integer bits 41 if (mv_class == MV_CLASS_0) { 42 update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE); 43 } else { 44 const int n = mv_class + CLASS0_BITS - 1; // number of bits 45 for (int i = 0; i < n; ++i) 46 update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2); 47 } 48 // Fractional bits 49 if (precision > MV_SUBPEL_NONE) { 50 aom_cdf_prob *fp_cdf = 51 mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf; 52 update_cdf(fp_cdf, fr, MV_FP_SIZE); 53 } 54 55 // High precision bit 56 if (precision > MV_SUBPEL_LOW_PRECISION) { 57 aom_cdf_prob *hp_cdf = 58 mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf; 59 update_cdf(hp_cdf, hp, 2); 60 } 61 } 62 63 void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx, 64 MvSubpelPrecision precision) { 65 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 66 const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); 67 68 update_cdf(mvctx->joints_cdf, j, MV_JOINTS); 69 70 if (mv_joint_vertical(j)) 71 update_mv_component_stats(diff.row, &mvctx->comps[0], precision); 72 73 if (mv_joint_horizontal(j)) 74 update_mv_component_stats(diff.col, &mvctx->comps[1], precision); 75 } 76 77 static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, 78 MvSubpelPrecision precision) { 79 assert(comp != 0); 80 int offset; 81 const int sign = comp < 0; 82 const int mag = sign ? -comp : comp; 83 const int mv_class = av1_get_mv_class(mag - 1, &offset); 84 const int d = offset >> 3; // int mv data 85 const int fr = (offset >> 1) & 3; // fractional mv data 86 const int hp = offset & 1; // high precision mv data 87 88 // Sign 89 aom_write_symbol(w, sign, mvcomp->sign_cdf, 2); 90 91 // Class 92 aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES); 93 94 // Integer bits 95 if (mv_class == MV_CLASS_0) { 96 aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE); 97 } else { 98 int i; 99 const int n = mv_class + CLASS0_BITS - 1; // number of bits 100 for (i = 0; i < n; ++i) 101 aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2); 102 } 103 // Fractional bits 104 if (precision > MV_SUBPEL_NONE) { 105 aom_write_symbol( 106 w, fr, 107 mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf, 108 MV_FP_SIZE); 109 } 110 111 // High precision bit 112 if (precision > MV_SUBPEL_LOW_PRECISION) 113 aom_write_symbol( 114 w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, 115 2); 116 } 117 118 /* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This 119 * is more than most L1D caches and is a significant chunk of L2. Write 120 * SIMD that uses streaming writes to avoid loading all of that into L1, or 121 * just don't update the larger component costs every time this called 122 * (or both). 123 */ 124 void av1_build_nmv_component_cost_table(int *mvcost, 125 const nmv_component *const mvcomp, 126 MvSubpelPrecision precision) { 127 int i, j, v, o, mantissa; 128 int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; 129 int bits_cost[MV_OFFSET_BITS][2]; 130 int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 }, 131 fp_cost[MV_FP_SIZE] = { 0 }; 132 int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 }; 133 134 av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL); 135 av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL); 136 av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL); 137 for (i = 0; i < MV_OFFSET_BITS; ++i) { 138 av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL); 139 } 140 141 if (precision > MV_SUBPEL_NONE) { 142 for (i = 0; i < CLASS0_SIZE; ++i) 143 av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], 144 NULL); 145 av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL); 146 } 147 148 if (precision > MV_SUBPEL_LOW_PRECISION) { 149 av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL); 150 av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL); 151 } 152 153 // Instead of accumulating the cost of each vector component's bits 154 // individually, compute the costs based on smaller vectors. Costs for 155 // [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1] 156 // respectively. Offsets are maintained to swap both 1) class costs when 157 // treated as a complete vector component with the highest set bit when 158 // treated as a mantissa (significand) and 2) leading zeros to account for 159 // the current exponent. 160 161 // Cost offsets 162 int cost_swap[MV_OFFSET_BITS] = { 0 }; 163 // Delta to convert positive vector to negative vector costs 164 int negate_sign = sign_cost[1] - sign_cost[0]; 165 166 // Initialize with offsets to swap the class costs with the costs of the 167 // highest set bit. 168 for (i = 1; i < MV_OFFSET_BITS; ++i) { 169 cost_swap[i] = bits_cost[i - 1][1]; 170 if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS]; 171 } 172 173 // Seed the fractional costs onto the output (overwritten latter). 174 for (o = 0; o < MV_FP_SIZE; ++o) { 175 int hp; 176 for (hp = 0; hp < 2; ++hp) { 177 v = 2 * o + hp + 1; 178 mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0]; 179 } 180 } 181 182 mvcost[0] = 0; 183 // Fill the costs for each exponent's vectors, using the costs set in the 184 // previous exponents. 185 for (i = 0; i < MV_OFFSET_BITS; ++i) { 186 const int exponent = (2 * MV_FP_SIZE) << i; 187 188 int class = 0; 189 if (i >= CLASS0_BITS) { 190 class = class_cost[i - CLASS0_BITS + 1]; 191 } 192 193 // Iterate through mantissas, keeping track of the location 194 // of the highest set bit for the mantissa. 195 // To be clear: in the outer loop, the position of the highest set bit 196 // (exponent) is tracked and, in this loop, the highest set bit of the 197 // mantissa is tracked. 198 mantissa = 0; 199 for (j = 0; j <= i; ++j) { 200 for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) { 201 int cost = mvcost[mantissa + 1] + class + cost_swap[j]; 202 v = exponent + mantissa + 1; 203 mvcost[v] = cost; 204 mvcost[-v] = cost + negate_sign; 205 } 206 cost_swap[j] += bits_cost[i][0]; 207 } 208 } 209 210 // Special case to avoid buffer overrun 211 { 212 int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS; 213 int class = class_cost[MV_CLASSES - 1]; 214 mantissa = 0; 215 for (j = 0; j < MV_OFFSET_BITS; ++j) { 216 for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) { 217 int cost = mvcost[mantissa + 1] + class + cost_swap[j]; 218 v = exponent + mantissa + 1; 219 mvcost[v] = cost; 220 mvcost[-v] = cost + negate_sign; 221 } 222 } 223 // At this point: mantissa = exponent >> 1 224 225 // Manually calculate the final cost offset 226 int cost_swap_hi = 227 bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2]; 228 for (; mantissa < exponent - 1; ++mantissa) { 229 int cost = mvcost[mantissa + 1] + class + cost_swap_hi; 230 v = exponent + mantissa + 1; 231 mvcost[v] = cost; 232 mvcost[-v] = cost + negate_sign; 233 } 234 } 235 236 // Fill costs for class0 vectors, overwriting previous placeholder values 237 // used for calculating the costs of the larger vectors. 238 for (i = 0; i < CLASS0_SIZE; ++i) { 239 const int top = i * 2 * MV_FP_SIZE; 240 for (o = 0; o < MV_FP_SIZE; ++o) { 241 int hp; 242 int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i]; 243 for (hp = 0; hp < 2; ++hp) { 244 v = top + 2 * o + hp + 1; 245 mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0]; 246 mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1]; 247 } 248 } 249 } 250 } 251 252 void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv, 253 const MV *ref, nmv_context *mvctx, int usehp) { 254 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 255 const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); 256 // If the mv_diff is zero, then we should have used near or nearest instead. 257 assert(j != MV_JOINT_ZERO); 258 if (cpi->common.features.cur_frame_force_integer_mv) { 259 usehp = MV_SUBPEL_NONE; 260 } 261 aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS); 262 if (mv_joint_vertical(j)) 263 encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); 264 265 if (mv_joint_horizontal(j)) 266 encode_mv_component(w, diff.col, &mvctx->comps[1], usehp); 267 268 // If auto_mv_step_size is enabled then keep track of the largest 269 // motion vector component used. 270 if (cpi->sf.mv_sf.auto_mv_step_size) { 271 int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3; 272 td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude); 273 } 274 } 275 276 void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref, 277 nmv_context *mvctx) { 278 // DV and ref DV should not have sub-pel. 279 assert((mv->col & 7) == 0); 280 assert((mv->row & 7) == 0); 281 assert((ref->col & 7) == 0); 282 assert((ref->row & 7) == 0); 283 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 284 const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); 285 286 aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS); 287 if (mv_joint_vertical(j)) 288 encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE); 289 290 if (mv_joint_horizontal(j)) 291 encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE); 292 } 293 294 void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], 295 const nmv_context *ctx, 296 MvSubpelPrecision precision) { 297 av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL); 298 av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision); 299 av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision); 300 } 301 302 int_mv av1_get_ref_mv_from_stack(int ref_idx, 303 const MV_REFERENCE_FRAME *ref_frame, 304 int ref_mv_idx, 305 const MB_MODE_INFO_EXT *mbmi_ext) { 306 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); 307 const CANDIDATE_MV *curr_ref_mv_stack = 308 mbmi_ext->ref_mv_stack[ref_frame_type]; 309 310 if (ref_frame[1] > INTRA_FRAME) { 311 assert(ref_idx == 0 || ref_idx == 1); 312 return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv 313 : curr_ref_mv_stack[ref_mv_idx].this_mv; 314 } 315 316 assert(ref_idx == 0); 317 return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type] 318 ? curr_ref_mv_stack[ref_mv_idx].this_mv 319 : mbmi_ext->global_mvs[ref_frame_type]; 320 } 321 322 int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) { 323 const MACROBLOCKD *xd = &x->e_mbd; 324 const MB_MODE_INFO *mbmi = xd->mi[0]; 325 int ref_mv_idx = mbmi->ref_mv_idx; 326 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) { 327 assert(has_second_ref(mbmi)); 328 ref_mv_idx += 1; 329 } 330 return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx, 331 &x->mbmi_ext); 332 } 333 334 void av1_find_best_ref_mvs_from_stack(int allow_hp, 335 const MB_MODE_INFO_EXT *mbmi_ext, 336 MV_REFERENCE_FRAME ref_frame, 337 int_mv *nearest_mv, int_mv *near_mv, 338 int is_integer) { 339 const int ref_idx = 0; 340 MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; 341 *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext); 342 lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer); 343 *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext); 344 lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer); 345 }