vp8.c (109758B)
1 /* 2 * VP7/VP8 compatible video decoder 3 * 4 * Copyright (C) 2010 David Conrad 5 * Copyright (C) 2010 Ronald S. Bultje 6 * Copyright (C) 2010 Fiona Glaser 7 * Copyright (C) 2012 Daniel Kang 8 * Copyright (C) 2014 Peter Ross 9 * 10 * This file is part of FFmpeg. 11 * 12 * FFmpeg is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2.1 of the License, or (at your option) any later version. 16 * 17 * FFmpeg is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with FFmpeg; if not, write to the Free Software 24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25 */ 26 27 #include "config_components.h" 28 29 #include "libavutil/mem.h" 30 #include "libavutil/mem_internal.h" 31 32 #include "avcodec.h" 33 #include "codec_internal.h" 34 #include "decode.h" 35 #include "hwaccel_internal.h" 36 #include "hwconfig.h" 37 #include "mathops.h" 38 #include "progressframe.h" 39 #include "libavutil/refstruct.h" 40 #include "thread.h" 41 #include "vp8.h" 42 #include "vp89_rac.h" 43 #include "vp8data.h" 44 #include "vpx_rac.h" 45 46 #if ARCH_ARM 47 # include "arm/vp8.h" 48 #endif 49 50 // fixme: add 1 bit to all the calls to this? 51 static int vp8_rac_get_sint(VPXRangeCoder *c, int bits) 52 { 53 int v; 54 55 if (!vp89_rac_get(c)) 56 return 0; 57 58 v = vp89_rac_get_uint(c, bits); 59 60 if (vp89_rac_get(c)) 61 v = -v; 62 63 return v; 64 } 65 66 static int vp8_rac_get_nn(VPXRangeCoder *c) 67 { 68 int v = vp89_rac_get_uint(c, 7) << 1; 69 return v + !v; 70 } 71 72 // DCTextra 73 static int vp8_rac_get_coeff(VPXRangeCoder *c, const uint8_t *prob) 74 { 75 int v = 0; 76 77 do { 78 v = (v<<1) + vpx_rac_get_prob(c, *prob++); 79 } while (*prob); 80 81 return v; 82 } 83 84 static void free_buffers(VP8Context *s) 85 { 86 int i; 87 if (s->thread_data) 88 for (i = 0; i < MAX_THREADS; i++) { 89 #if HAVE_THREADS 90 pthread_cond_destroy(&s->thread_data[i].cond); 91 pthread_mutex_destroy(&s->thread_data[i].lock); 92 #endif 93 av_freep(&s->thread_data[i].filter_strength); 94 } 95 av_freep(&s->thread_data); 96 av_freep(&s->macroblocks_base); 97 av_freep(&s->intra4x4_pred_mode_top); 98 av_freep(&s->top_nnz); 99 av_freep(&s->top_border); 100 101 s->macroblocks = NULL; 102 } 103 104 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) 105 { 106 int ret = ff_progress_frame_get_buffer(s->avctx, &f->tf, 107 ref ? AV_GET_BUFFER_FLAG_REF : 0); 108 if (ret < 0) 109 return ret; 110 f->seg_map = av_refstruct_allocz(s->mb_width * s->mb_height); 111 if (!f->seg_map) { 112 ret = AVERROR(ENOMEM); 113 goto fail; 114 } 115 ret = ff_hwaccel_frame_priv_alloc(s->avctx, &f->hwaccel_picture_private); 116 if (ret < 0) 117 goto fail; 118 119 return 0; 120 121 fail: 122 av_refstruct_unref(&f->seg_map); 123 ff_progress_frame_unref(&f->tf); 124 return ret; 125 } 126 127 static void vp8_release_frame(VP8Frame *f) 128 { 129 av_refstruct_unref(&f->seg_map); 130 av_refstruct_unref(&f->hwaccel_picture_private); 131 ff_progress_frame_unref(&f->tf); 132 } 133 134 static av_cold void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) 135 { 136 VP8Context *s = avctx->priv_data; 137 int i; 138 139 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) 140 vp8_release_frame(&s->frames[i]); 141 memset(s->framep, 0, sizeof(s->framep)); 142 143 if (free_mem) 144 free_buffers(s); 145 146 if (FF_HW_HAS_CB(avctx, flush)) 147 FF_HW_SIMPLE_CALL(avctx, flush); 148 } 149 150 static av_cold void vp8_decode_flush(AVCodecContext *avctx) 151 { 152 vp8_decode_flush_impl(avctx, 0); 153 } 154 155 static VP8Frame *vp8_find_free_buffer(VP8Context *s) 156 { 157 VP8Frame *frame = NULL; 158 int i; 159 160 // find a free buffer 161 for (i = 0; i < 5; i++) 162 if (&s->frames[i] != s->framep[VP8_FRAME_CURRENT] && 163 &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] && 164 &s->frames[i] != s->framep[VP8_FRAME_GOLDEN] && 165 &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) { 166 frame = &s->frames[i]; 167 break; 168 } 169 if (i == 5) { 170 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); 171 abort(); 172 } 173 if (frame->tf.f) 174 vp8_release_frame(frame); 175 176 return frame; 177 } 178 179 static enum AVPixelFormat get_pixel_format(VP8Context *s) 180 { 181 enum AVPixelFormat pix_fmts[] = { 182 #if CONFIG_VP8_VAAPI_HWACCEL 183 AV_PIX_FMT_VAAPI, 184 #endif 185 #if CONFIG_VP8_NVDEC_HWACCEL 186 AV_PIX_FMT_CUDA, 187 #endif 188 AV_PIX_FMT_YUV420P, 189 AV_PIX_FMT_NONE, 190 }; 191 192 return ff_get_format(s->avctx, pix_fmts); 193 } 194 195 static av_always_inline 196 int update_dimensions(VP8Context *s, int width, int height, int is_vp7) 197 { 198 AVCodecContext *avctx = s->avctx; 199 int i, ret, dim_reset = 0; 200 201 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base || 202 height != s->avctx->height) { 203 vp8_decode_flush_impl(s->avctx, 1); 204 205 ret = ff_set_dimensions(s->avctx, width, height); 206 if (ret < 0) 207 return ret; 208 209 dim_reset = (s->macroblocks_base != NULL); 210 } 211 212 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) && 213 !s->actually_webp && !is_vp7) { 214 s->pix_fmt = get_pixel_format(s); 215 if (s->pix_fmt < 0) 216 return AVERROR(EINVAL); 217 avctx->pix_fmt = s->pix_fmt; 218 } 219 220 s->mb_width = (s->avctx->coded_width + 15) / 16; 221 s->mb_height = (s->avctx->coded_height + 15) / 16; 222 223 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE && 224 avctx->thread_count > 1; 225 if (!s->mb_layout) { // Frame threading and one thread 226 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) * 227 sizeof(*s->macroblocks)); 228 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4); 229 } else // Sliced threading 230 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) * 231 sizeof(*s->macroblocks)); 232 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz)); 233 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border)); 234 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData)); 235 236 if (!s->macroblocks_base || !s->top_nnz || !s->top_border || 237 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) { 238 free_buffers(s); 239 return AVERROR(ENOMEM); 240 } 241 242 for (i = 0; i < MAX_THREADS; i++) { 243 s->thread_data[i].filter_strength = 244 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength)); 245 if (!s->thread_data[i].filter_strength) { 246 free_buffers(s); 247 return AVERROR(ENOMEM); 248 } 249 #if HAVE_THREADS 250 ret = pthread_mutex_init(&s->thread_data[i].lock, NULL); 251 if (ret) { 252 free_buffers(s); 253 return AVERROR(ret); 254 } 255 ret = pthread_cond_init(&s->thread_data[i].cond, NULL); 256 if (ret) { 257 free_buffers(s); 258 return AVERROR(ret); 259 } 260 #endif 261 } 262 263 s->macroblocks = s->macroblocks_base + 1; 264 265 return 0; 266 } 267 268 static int vp7_update_dimensions(VP8Context *s, int width, int height) 269 { 270 return update_dimensions(s, width, height, IS_VP7); 271 } 272 273 static int vp8_update_dimensions(VP8Context *s, int width, int height) 274 { 275 return update_dimensions(s, width, height, IS_VP8); 276 } 277 278 279 static void parse_segment_info(VP8Context *s) 280 { 281 VPXRangeCoder *c = &s->c; 282 int i; 283 284 s->segmentation.update_map = vp89_rac_get(c); 285 s->segmentation.update_feature_data = vp89_rac_get(c); 286 287 if (s->segmentation.update_feature_data) { 288 s->segmentation.absolute_vals = vp89_rac_get(c); 289 290 for (i = 0; i < 4; i++) 291 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); 292 293 for (i = 0; i < 4; i++) 294 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); 295 } 296 if (s->segmentation.update_map) 297 for (i = 0; i < 3; i++) 298 s->prob->segmentid[i] = vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255; 299 } 300 301 static void update_lf_deltas(VP8Context *s) 302 { 303 VPXRangeCoder *c = &s->c; 304 int i; 305 306 for (i = 0; i < 4; i++) { 307 if (vp89_rac_get(c)) { 308 s->lf_delta.ref[i] = vp89_rac_get_uint(c, 6); 309 310 if (vp89_rac_get(c)) 311 s->lf_delta.ref[i] = -s->lf_delta.ref[i]; 312 } 313 } 314 315 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) { 316 if (vp89_rac_get(c)) { 317 s->lf_delta.mode[i] = vp89_rac_get_uint(c, 6); 318 319 if (vp89_rac_get(c)) 320 s->lf_delta.mode[i] = -s->lf_delta.mode[i]; 321 } 322 } 323 } 324 325 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 326 { 327 const uint8_t *sizes = buf; 328 int i; 329 int ret; 330 331 s->num_coeff_partitions = 1 << vp89_rac_get_uint(&s->c, 2); 332 333 buf += 3 * (s->num_coeff_partitions - 1); 334 buf_size -= 3 * (s->num_coeff_partitions - 1); 335 if (buf_size < 0) 336 return -1; 337 338 for (i = 0; i < s->num_coeff_partitions - 1; i++) { 339 int size = AV_RL24(sizes + 3 * i); 340 if (buf_size - size < 0) 341 return -1; 342 s->coeff_partition_size[i] = size; 343 344 ret = ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, size); 345 if (ret < 0) 346 return ret; 347 buf += size; 348 buf_size -= size; 349 } 350 351 s->coeff_partition_size[i] = buf_size; 352 353 return ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size); 354 } 355 356 static void vp7_get_quants(VP8Context *s) 357 { 358 VPXRangeCoder *c = &s->c; 359 360 int yac_qi = vp89_rac_get_uint(c, 7); 361 int ydc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; 362 int y2dc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; 363 int y2ac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; 364 int uvdc_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; 365 int uvac_qi = vp89_rac_get(c) ? vp89_rac_get_uint(c, 7) : yac_qi; 366 367 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi]; 368 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi]; 369 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi]; 370 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi]; 371 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132); 372 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi]; 373 } 374 375 static void vp8_get_quants(VP8Context *s) 376 { 377 VPXRangeCoder *c = &s->c; 378 int i, base_qi; 379 380 s->quant.yac_qi = vp89_rac_get_uint(c, 7); 381 s->quant.ydc_delta = vp8_rac_get_sint(c, 4); 382 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4); 383 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4); 384 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4); 385 s->quant.uvac_delta = vp8_rac_get_sint(c, 4); 386 387 for (i = 0; i < 4; i++) { 388 if (s->segmentation.enabled) { 389 base_qi = s->segmentation.base_quant[i]; 390 if (!s->segmentation.absolute_vals) 391 base_qi += s->quant.yac_qi; 392 } else 393 base_qi = s->quant.yac_qi; 394 395 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)]; 396 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; 397 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2; 398 /* 101581>>16 is equivalent to 155/100 */ 399 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16; 400 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)]; 401 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)]; 402 403 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); 404 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); 405 } 406 } 407 408 /** 409 * Determine which buffers golden and altref should be updated with after this frame. 410 * The spec isn't clear here, so I'm going by my understanding of what libvpx does 411 * 412 * Intra frames update all 3 references 413 * Inter frames update VP8_FRAME_PREVIOUS if the update_last flag is set 414 * If the update (golden|altref) flag is set, it's updated with the current frame 415 * if update_last is set, and VP8_FRAME_PREVIOUS otherwise. 416 * If the flag is not set, the number read means: 417 * 0: no update 418 * 1: VP8_FRAME_PREVIOUS 419 * 2: update golden with altref, or update altref with golden 420 */ 421 static VP8FrameType ref_to_update(VP8Context *s, int update, VP8FrameType ref) 422 { 423 VPXRangeCoder *c = &s->c; 424 425 if (update) 426 return VP8_FRAME_CURRENT; 427 428 switch (vp89_rac_get_uint(c, 2)) { 429 case 1: 430 return VP8_FRAME_PREVIOUS; 431 case 2: 432 return (ref == VP8_FRAME_GOLDEN) ? VP8_FRAME_ALTREF : VP8_FRAME_GOLDEN; 433 } 434 return VP8_FRAME_NONE; 435 } 436 437 static void vp78_reset_probability_tables(VP8Context *s) 438 { 439 int i, j; 440 for (i = 0; i < 4; i++) 441 for (j = 0; j < 16; j++) 442 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], 443 sizeof(s->prob->token[i][j])); 444 } 445 446 static void vp78_update_probability_tables(VP8Context *s) 447 { 448 VPXRangeCoder *c = &s->c; 449 int i, j, k, l, m; 450 451 for (i = 0; i < 4; i++) 452 for (j = 0; j < 8; j++) 453 for (k = 0; k < 3; k++) 454 for (l = 0; l < NUM_DCT_TOKENS-1; l++) 455 if (vpx_rac_get_prob_branchy(c, ff_vp8_token_update_probs[i][j][k][l])) { 456 int prob = vp89_rac_get_uint(c, 8); 457 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) 458 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; 459 } 460 } 461 462 #define VP7_MVC_SIZE 17 463 #define VP8_MVC_SIZE 19 464 465 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s, 466 int mvc_size) 467 { 468 VPXRangeCoder *c = &s->c; 469 int i, j; 470 471 if (vp89_rac_get(c)) 472 for (i = 0; i < 4; i++) 473 s->prob->pred16x16[i] = vp89_rac_get_uint(c, 8); 474 if (vp89_rac_get(c)) 475 for (i = 0; i < 3; i++) 476 s->prob->pred8x8c[i] = vp89_rac_get_uint(c, 8); 477 478 // 17.2 MV probability update 479 for (i = 0; i < 2; i++) 480 for (j = 0; j < mvc_size; j++) 481 if (vpx_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) 482 s->prob->mvc[i][j] = vp8_rac_get_nn(c); 483 } 484 485 static void update_refs(VP8Context *s) 486 { 487 VPXRangeCoder *c = &s->c; 488 489 int update_golden = vp89_rac_get(c); 490 int update_altref = vp89_rac_get(c); 491 492 s->update_golden = ref_to_update(s, update_golden, VP8_FRAME_GOLDEN); 493 s->update_altref = ref_to_update(s, update_altref, VP8_FRAME_ALTREF); 494 } 495 496 static void copy_chroma(AVFrame *dst, const AVFrame *src, int width, int height) 497 { 498 int i, j; 499 500 for (j = 1; j < 3; j++) { 501 for (i = 0; i < height / 2; i++) 502 memcpy(dst->data[j] + i * dst->linesize[j], 503 src->data[j] + i * src->linesize[j], width / 2); 504 } 505 } 506 507 static void fade(uint8_t *dst, ptrdiff_t dst_linesize, 508 const uint8_t *src, ptrdiff_t src_linesize, 509 int width, int height, 510 int alpha, int beta) 511 { 512 int i, j; 513 for (j = 0; j < height; j++) { 514 const uint8_t *src2 = src + j * src_linesize; 515 uint8_t *dst2 = dst + j * dst_linesize; 516 for (i = 0; i < width; i++) { 517 uint8_t y = src2[i]; 518 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha); 519 } 520 } 521 } 522 523 static int vp7_fade_frame(VP8Context *s, int alpha, int beta) 524 { 525 int ret; 526 527 if (!s->keyframe && (alpha || beta)) { 528 int width = s->mb_width * 16; 529 int height = s->mb_height * 16; 530 const AVFrame *src; 531 AVFrame *dst; 532 533 if (!s->framep[VP8_FRAME_PREVIOUS] || 534 !s->framep[VP8_FRAME_GOLDEN]) { 535 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); 536 return AVERROR_INVALIDDATA; 537 } 538 539 src = 540 dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f; 541 542 /* preserve the golden frame, write a new previous frame */ 543 if (s->framep[VP8_FRAME_GOLDEN] == s->framep[VP8_FRAME_PREVIOUS]) { 544 s->framep[VP8_FRAME_PREVIOUS] = vp8_find_free_buffer(s); 545 if ((ret = vp8_alloc_frame(s, s->framep[VP8_FRAME_PREVIOUS], 1)) < 0) 546 return ret; 547 548 dst = s->framep[VP8_FRAME_PREVIOUS]->tf.f; 549 550 copy_chroma(dst, src, width, height); 551 } 552 553 fade(dst->data[0], dst->linesize[0], 554 src->data[0], src->linesize[0], 555 width, height, alpha, beta); 556 } 557 558 return 0; 559 } 560 561 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 562 { 563 VPXRangeCoder *c = &s->c; 564 int part1_size, hscale, vscale, i, j, ret; 565 int width = s->avctx->width; 566 int height = s->avctx->height; 567 int alpha = 0; 568 int beta = 0; 569 int fade_present = 1; 570 571 if (buf_size < 4) { 572 return AVERROR_INVALIDDATA; 573 } 574 575 s->profile = (buf[0] >> 1) & 7; 576 if (s->profile > 1) { 577 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile); 578 return AVERROR_INVALIDDATA; 579 } 580 581 s->keyframe = !(buf[0] & 1); 582 s->invisible = 0; 583 part1_size = AV_RL24(buf) >> 4; 584 585 if (buf_size < 4 - s->profile + part1_size) { 586 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size); 587 return AVERROR_INVALIDDATA; 588 } 589 590 buf += 4 - s->profile; 591 buf_size -= 4 - s->profile; 592 593 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); 594 595 ret = ff_vpx_init_range_decoder(c, buf, part1_size); 596 if (ret < 0) 597 return ret; 598 buf += part1_size; 599 buf_size -= part1_size; 600 601 /* A. Dimension information (keyframes only) */ 602 if (s->keyframe) { 603 width = vp89_rac_get_uint(c, 12); 604 height = vp89_rac_get_uint(c, 12); 605 hscale = vp89_rac_get_uint(c, 2); 606 vscale = vp89_rac_get_uint(c, 2); 607 if (hscale || vscale) 608 avpriv_request_sample(s->avctx, "Upscaling"); 609 610 s->update_golden = s->update_altref = VP8_FRAME_CURRENT; 611 vp78_reset_probability_tables(s); 612 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, 613 sizeof(s->prob->pred16x16)); 614 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, 615 sizeof(s->prob->pred8x8c)); 616 for (i = 0; i < 2; i++) 617 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], 618 sizeof(vp7_mv_default_prob[i])); 619 memset(&s->segmentation, 0, sizeof(s->segmentation)); 620 memset(&s->lf_delta, 0, sizeof(s->lf_delta)); 621 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); 622 } 623 624 if (s->keyframe || s->profile > 0) 625 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred)); 626 627 /* B. Decoding information for all four macroblock-level features */ 628 for (i = 0; i < 4; i++) { 629 s->feature_enabled[i] = vp89_rac_get(c); 630 if (s->feature_enabled[i]) { 631 s->feature_present_prob[i] = vp89_rac_get_uint(c, 8); 632 633 for (j = 0; j < 3; j++) 634 s->feature_index_prob[i][j] = 635 vp89_rac_get(c) ? vp89_rac_get_uint(c, 8) : 255; 636 637 if (vp7_feature_value_size[s->profile][i]) 638 for (j = 0; j < 4; j++) 639 s->feature_value[i][j] = 640 vp89_rac_get(c) ? vp89_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0; 641 } 642 } 643 644 s->segmentation.enabled = 0; 645 s->segmentation.update_map = 0; 646 s->lf_delta.enabled = 0; 647 648 s->num_coeff_partitions = 1; 649 ret = ff_vpx_init_range_decoder(&s->coeff_partition[0], buf, buf_size); 650 if (ret < 0) 651 return ret; 652 653 if (!s->macroblocks_base || /* first frame */ 654 width != s->avctx->width || height != s->avctx->height || 655 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) { 656 if ((ret = vp7_update_dimensions(s, width, height)) < 0) 657 return ret; 658 } 659 660 /* C. Dequantization indices */ 661 vp7_get_quants(s); 662 663 /* D. Golden frame update flag (a Flag) for interframes only */ 664 if (!s->keyframe) { 665 s->update_golden = vp89_rac_get(c) ? VP8_FRAME_CURRENT : VP8_FRAME_NONE; 666 s->sign_bias[VP8_FRAME_GOLDEN] = 0; 667 } 668 669 s->update_last = 1; 670 s->update_probabilities = 1; 671 672 if (s->profile > 0) { 673 s->update_probabilities = vp89_rac_get(c); 674 if (!s->update_probabilities) 675 s->prob[1] = s->prob[0]; 676 677 if (!s->keyframe) 678 fade_present = vp89_rac_get(c); 679 } 680 681 if (vpx_rac_is_end(c)) 682 return AVERROR_INVALIDDATA; 683 /* E. Fading information for previous frame */ 684 if (fade_present && vp89_rac_get(c)) { 685 alpha = (int8_t) vp89_rac_get_uint(c, 8); 686 beta = (int8_t) vp89_rac_get_uint(c, 8); 687 } 688 689 /* F. Loop filter type */ 690 if (!s->profile) 691 s->filter.simple = vp89_rac_get(c); 692 693 /* G. DCT coefficient ordering specification */ 694 if (vp89_rac_get(c)) 695 for (i = 1; i < 16; i++) 696 s->prob[0].scan[i] = ff_zigzag_scan[vp89_rac_get_uint(c, 4)]; 697 698 /* H. Loop filter levels */ 699 if (s->profile > 0) 700 s->filter.simple = vp89_rac_get(c); 701 s->filter.level = vp89_rac_get_uint(c, 6); 702 s->filter.sharpness = vp89_rac_get_uint(c, 3); 703 704 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */ 705 vp78_update_probability_tables(s); 706 707 s->mbskip_enabled = 0; 708 709 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */ 710 if (!s->keyframe) { 711 s->prob->intra = vp89_rac_get_uint(c, 8); 712 s->prob->last = vp89_rac_get_uint(c, 8); 713 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE); 714 } 715 716 if (vpx_rac_is_end(c)) 717 return AVERROR_INVALIDDATA; 718 719 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0) 720 return ret; 721 722 return 0; 723 } 724 725 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 726 { 727 VPXRangeCoder *c = &s->c; 728 int header_size, hscale, vscale, ret; 729 int width = s->avctx->width; 730 int height = s->avctx->height; 731 732 if (buf_size < 3) { 733 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size); 734 return AVERROR_INVALIDDATA; 735 } 736 737 s->keyframe = !(buf[0] & 1); 738 s->profile = (buf[0]>>1) & 7; 739 s->invisible = !(buf[0] & 0x10); 740 header_size = AV_RL24(buf) >> 5; 741 buf += 3; 742 buf_size -= 3; 743 744 s->header_partition_size = header_size; 745 746 if (s->profile > 3) 747 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); 748 749 if (!s->profile) 750 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, 751 sizeof(s->put_pixels_tab)); 752 else // profile 1-3 use bilinear, 4+ aren't defined so whatever 753 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, 754 sizeof(s->put_pixels_tab)); 755 756 if (header_size > buf_size - 7 * s->keyframe) { 757 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); 758 return AVERROR_INVALIDDATA; 759 } 760 761 if (s->keyframe) { 762 if (AV_RL24(buf) != 0x2a019d) { 763 av_log(s->avctx, AV_LOG_ERROR, 764 "Invalid start code 0x%x\n", AV_RL24(buf)); 765 return AVERROR_INVALIDDATA; 766 } 767 width = AV_RL16(buf + 3) & 0x3fff; 768 height = AV_RL16(buf + 5) & 0x3fff; 769 hscale = buf[4] >> 6; 770 vscale = buf[6] >> 6; 771 buf += 7; 772 buf_size -= 7; 773 774 if (hscale || vscale) 775 avpriv_request_sample(s->avctx, "Upscaling"); 776 777 s->update_golden = s->update_altref = VP8_FRAME_CURRENT; 778 vp78_reset_probability_tables(s); 779 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, 780 sizeof(s->prob->pred16x16)); 781 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, 782 sizeof(s->prob->pred8x8c)); 783 memcpy(s->prob->mvc, vp8_mv_default_prob, 784 sizeof(s->prob->mvc)); 785 memset(&s->segmentation, 0, sizeof(s->segmentation)); 786 memset(&s->lf_delta, 0, sizeof(s->lf_delta)); 787 } 788 789 ret = ff_vpx_init_range_decoder(c, buf, header_size); 790 if (ret < 0) 791 return ret; 792 buf += header_size; 793 buf_size -= header_size; 794 795 if (s->keyframe) { 796 s->colorspace = vp89_rac_get(c); 797 if (s->colorspace) 798 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); 799 s->fullrange = vp89_rac_get(c); 800 } 801 802 if ((s->segmentation.enabled = vp89_rac_get(c))) 803 parse_segment_info(s); 804 else 805 s->segmentation.update_map = 0; // FIXME: move this to some init function? 806 807 s->filter.simple = vp89_rac_get(c); 808 s->filter.level = vp89_rac_get_uint(c, 6); 809 s->filter.sharpness = vp89_rac_get_uint(c, 3); 810 811 if ((s->lf_delta.enabled = vp89_rac_get(c))) { 812 s->lf_delta.update = vp89_rac_get(c); 813 if (s->lf_delta.update) 814 update_lf_deltas(s); 815 } 816 817 if (setup_partitions(s, buf, buf_size)) { 818 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); 819 return AVERROR_INVALIDDATA; 820 } 821 822 if (!s->macroblocks_base || /* first frame */ 823 width != s->avctx->width || height != s->avctx->height || 824 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) 825 if ((ret = vp8_update_dimensions(s, width, height)) < 0) 826 return ret; 827 828 vp8_get_quants(s); 829 830 if (!s->keyframe) { 831 update_refs(s); 832 s->sign_bias[VP8_FRAME_GOLDEN] = vp89_rac_get(c); 833 s->sign_bias[VP8_FRAME_ALTREF] = vp89_rac_get(c); 834 } 835 836 // if we aren't saving this frame's probabilities for future frames, 837 // make a copy of the current probabilities 838 if (!(s->update_probabilities = vp89_rac_get(c))) 839 s->prob[1] = s->prob[0]; 840 841 s->update_last = s->keyframe || vp89_rac_get(c); 842 843 vp78_update_probability_tables(s); 844 845 if ((s->mbskip_enabled = vp89_rac_get(c))) 846 s->prob->mbskip = vp89_rac_get_uint(c, 8); 847 848 if (!s->keyframe) { 849 s->prob->intra = vp89_rac_get_uint(c, 8); 850 s->prob->last = vp89_rac_get_uint(c, 8); 851 s->prob->golden = vp89_rac_get_uint(c, 8); 852 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE); 853 } 854 855 // Record the entropy coder state here so that hwaccels can use it. 856 s->c.code_word = vpx_rac_renorm(&s->c); 857 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8); 858 s->coder_state_at_header_end.range = s->c.high; 859 s->coder_state_at_header_end.value = s->c.code_word >> 16; 860 s->coder_state_at_header_end.bit_count = -s->c.bits % 8; 861 862 return 0; 863 } 864 865 static av_always_inline 866 void clamp_mv(const VP8mvbounds *s, VP8mv *dst, const VP8mv *src) 867 { 868 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX), 869 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX)); 870 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX), 871 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX)); 872 } 873 874 /** 875 * Motion vector coding, 17.1. 876 */ 877 static av_always_inline int read_mv_component(VPXRangeCoder *c, const uint8_t *p, int vp7) 878 { 879 int bit, x = 0; 880 881 if (vpx_rac_get_prob_branchy(c, p[0])) { 882 int i; 883 884 for (i = 0; i < 3; i++) 885 x += vpx_rac_get_prob(c, p[9 + i]) << i; 886 for (i = (vp7 ? 7 : 9); i > 3; i--) 887 x += vpx_rac_get_prob(c, p[9 + i]) << i; 888 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vpx_rac_get_prob(c, p[12])) 889 x += 8; 890 } else { 891 // small_mvtree 892 const uint8_t *ps = p + 2; 893 bit = vpx_rac_get_prob(c, *ps); 894 ps += 1 + 3 * bit; 895 x += 4 * bit; 896 bit = vpx_rac_get_prob(c, *ps); 897 ps += 1 + bit; 898 x += 2 * bit; 899 x += vpx_rac_get_prob(c, *ps); 900 } 901 902 return (x && vpx_rac_get_prob(c, p[1])) ? -x : x; 903 } 904 905 static int vp7_read_mv_component(VPXRangeCoder *c, const uint8_t *p) 906 { 907 return read_mv_component(c, p, 1); 908 } 909 910 static int vp8_read_mv_component(VPXRangeCoder *c, const uint8_t *p) 911 { 912 return read_mv_component(c, p, 0); 913 } 914 915 static av_always_inline 916 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7) 917 { 918 if (is_vp7) 919 return vp7_submv_prob; 920 921 if (left == top) 922 return vp8_submv_prob[4 - !!left]; 923 if (!top) 924 return vp8_submv_prob[2]; 925 return vp8_submv_prob[1 - !!left]; 926 } 927 928 /** 929 * Split motion vector prediction, 16.4. 930 * @returns the number of motion vectors parsed (2, 4 or 16) 931 */ 932 static av_always_inline 933 int decode_splitmvs(const VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb, 934 int layout, int is_vp7) 935 { 936 int part_idx; 937 int n, num; 938 const VP8Macroblock *top_mb; 939 const VP8Macroblock *left_mb = &mb[-1]; 940 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning]; 941 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx; 942 const VP8mv *top_mv; 943 const VP8mv *left_mv = left_mb->bmv; 944 const VP8mv *cur_mv = mb->bmv; 945 946 if (!layout) // layout is inlined, s->mb_layout is not 947 top_mb = &mb[2]; 948 else 949 top_mb = &mb[-s->mb_width - 1]; 950 mbsplits_top = vp8_mbsplits[top_mb->partitioning]; 951 top_mv = top_mb->bmv; 952 953 if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { 954 if (vpx_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) 955 part_idx = VP8_SPLITMVMODE_16x8 + vpx_rac_get_prob(c, vp8_mbsplit_prob[2]); 956 else 957 part_idx = VP8_SPLITMVMODE_8x8; 958 } else { 959 part_idx = VP8_SPLITMVMODE_4x4; 960 } 961 962 num = vp8_mbsplit_count[part_idx]; 963 mbsplits_cur = vp8_mbsplits[part_idx], 964 firstidx = vp8_mbfirstidx[part_idx]; 965 mb->partitioning = part_idx; 966 967 for (n = 0; n < num; n++) { 968 int k = firstidx[n]; 969 uint32_t left, above; 970 const uint8_t *submv_prob; 971 972 if (!(k & 3)) 973 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); 974 else 975 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); 976 if (k <= 3) 977 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); 978 else 979 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); 980 981 submv_prob = get_submv_prob(left, above, is_vp7); 982 983 if (vpx_rac_get_prob_branchy(c, submv_prob[0])) { 984 if (vpx_rac_get_prob_branchy(c, submv_prob[1])) { 985 if (vpx_rac_get_prob_branchy(c, submv_prob[2])) { 986 mb->bmv[n].y = mb->mv.y + 987 read_mv_component(c, s->prob->mvc[0], is_vp7); 988 mb->bmv[n].x = mb->mv.x + 989 read_mv_component(c, s->prob->mvc[1], is_vp7); 990 } else { 991 AV_ZERO32(&mb->bmv[n]); 992 } 993 } else { 994 AV_WN32A(&mb->bmv[n], above); 995 } 996 } else { 997 AV_WN32A(&mb->bmv[n], left); 998 } 999 } 1000 1001 return num; 1002 } 1003 1004 /** 1005 * The vp7 reference decoder uses a padding macroblock column (added to right 1006 * edge of the frame) to guard against illegal macroblock offsets. The 1007 * algorithm has bugs that permit offsets to straddle the padding column. 1008 * This function replicates those bugs. 1009 * 1010 * @param[out] edge_x macroblock x address 1011 * @param[out] edge_y macroblock y address 1012 * 1013 * @return macroblock offset legal (boolean) 1014 */ 1015 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, 1016 int xoffset, int yoffset, int boundary, 1017 int *edge_x, int *edge_y) 1018 { 1019 int vwidth = mb_width + 1; 1020 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset; 1021 if (new < boundary || new % vwidth == vwidth - 1) 1022 return 0; 1023 *edge_y = new / vwidth; 1024 *edge_x = new % vwidth; 1025 return 1; 1026 } 1027 1028 static const VP8mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock) 1029 { 1030 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0]; 1031 } 1032 1033 static av_always_inline 1034 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, 1035 int mb_x, int mb_y, int layout) 1036 { 1037 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR }; 1038 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; 1039 int idx = CNT_ZERO; 1040 VP8mv near_mv[3]; 1041 uint8_t cnt[3] = { 0 }; 1042 VPXRangeCoder *c = &s->c; 1043 int i; 1044 1045 AV_ZERO32(&near_mv[0]); 1046 AV_ZERO32(&near_mv[1]); 1047 AV_ZERO32(&near_mv[2]); 1048 1049 for (i = 0; i < VP7_MV_PRED_COUNT; i++) { 1050 const VP7MVPred * pred = &vp7_mv_pred[i]; 1051 int edge_x, edge_y; 1052 1053 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, 1054 pred->yoffset, !s->profile, &edge_x, &edge_y)) { 1055 const VP8Macroblock *edge = (s->mb_layout == 1) 1056 ? s->macroblocks_base + 1 + edge_x + 1057 (s->mb_width + 1) * (edge_y + 1) 1058 : s->macroblocks + edge_x + 1059 (s->mb_height - edge_y - 1) * 2; 1060 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock)); 1061 if (mv) { 1062 if (AV_RN32A(&near_mv[CNT_NEAREST])) { 1063 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) { 1064 idx = CNT_NEAREST; 1065 } else if (AV_RN32A(&near_mv[CNT_NEAR])) { 1066 if (mv != AV_RN32A(&near_mv[CNT_NEAR])) 1067 continue; 1068 idx = CNT_NEAR; 1069 } else { 1070 AV_WN32A(&near_mv[CNT_NEAR], mv); 1071 idx = CNT_NEAR; 1072 } 1073 } else { 1074 AV_WN32A(&near_mv[CNT_NEAREST], mv); 1075 idx = CNT_NEAREST; 1076 } 1077 } else { 1078 idx = CNT_ZERO; 1079 } 1080 } else { 1081 idx = CNT_ZERO; 1082 } 1083 cnt[idx] += vp7_mv_pred[i].score; 1084 } 1085 1086 mb->partitioning = VP8_SPLITMVMODE_NONE; 1087 1088 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) { 1089 mb->mode = VP8_MVMODE_MV; 1090 1091 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) { 1092 1093 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) { 1094 1095 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR]) 1096 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST])); 1097 else 1098 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR])); 1099 1100 if (vpx_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) { 1101 mb->mode = VP8_MVMODE_SPLIT; 1102 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1]; 1103 } else { 1104 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]); 1105 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]); 1106 mb->bmv[0] = mb->mv; 1107 } 1108 } else { 1109 mb->mv = near_mv[CNT_NEAR]; 1110 mb->bmv[0] = mb->mv; 1111 } 1112 } else { 1113 mb->mv = near_mv[CNT_NEAREST]; 1114 mb->bmv[0] = mb->mv; 1115 } 1116 } else { 1117 mb->mode = VP8_MVMODE_ZERO; 1118 AV_ZERO32(&mb->mv); 1119 mb->bmv[0] = mb->mv; 1120 } 1121 } 1122 1123 static av_always_inline 1124 void vp8_decode_mvs(VP8Context *s, const VP8mvbounds *mv_bounds, VP8Macroblock *mb, 1125 int mb_x, int mb_y, int layout) 1126 { 1127 VP8Macroblock *mb_edge[3] = { 0 /* top */, 1128 mb - 1 /* left */, 1129 0 /* top-left */ }; 1130 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; 1131 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; 1132 int idx = CNT_ZERO; 1133 int cur_sign_bias = s->sign_bias[mb->ref_frame]; 1134 const int8_t *sign_bias = s->sign_bias; 1135 VP8mv near_mv[4]; 1136 uint8_t cnt[4] = { 0 }; 1137 VPXRangeCoder *c = &s->c; 1138 1139 if (!layout) { // layout is inlined (s->mb_layout is not) 1140 mb_edge[0] = mb + 2; 1141 mb_edge[2] = mb + 1; 1142 } else { 1143 mb_edge[0] = mb - s->mb_width - 1; 1144 mb_edge[2] = mb - s->mb_width - 2; 1145 } 1146 1147 AV_ZERO32(&near_mv[0]); 1148 AV_ZERO32(&near_mv[1]); 1149 AV_ZERO32(&near_mv[2]); 1150 1151 /* Process MB on top, left and top-left */ 1152 #define MV_EDGE_CHECK(n) \ 1153 { \ 1154 const VP8Macroblock *edge = mb_edge[n]; \ 1155 int edge_ref = edge->ref_frame; \ 1156 if (edge_ref != VP8_FRAME_CURRENT) { \ 1157 uint32_t mv = AV_RN32A(&edge->mv); \ 1158 if (mv) { \ 1159 if (cur_sign_bias != sign_bias[edge_ref]) { \ 1160 /* SWAR negate of the values in mv. */ \ 1161 mv = ~mv; \ 1162 mv = ((mv & 0x7fff7fff) + \ 1163 0x00010001) ^ (mv & 0x80008000); \ 1164 } \ 1165 if (!n || mv != AV_RN32A(&near_mv[idx])) \ 1166 AV_WN32A(&near_mv[++idx], mv); \ 1167 cnt[idx] += 1 + (n != 2); \ 1168 } else \ 1169 cnt[CNT_ZERO] += 1 + (n != 2); \ 1170 } \ 1171 } 1172 1173 MV_EDGE_CHECK(0) 1174 MV_EDGE_CHECK(1) 1175 MV_EDGE_CHECK(2) 1176 1177 mb->partitioning = VP8_SPLITMVMODE_NONE; 1178 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { 1179 mb->mode = VP8_MVMODE_MV; 1180 1181 /* If we have three distinct MVs, merge first and last if they're the same */ 1182 if (cnt[CNT_SPLITMV] && 1183 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) 1184 cnt[CNT_NEAREST] += 1; 1185 1186 /* Swap near and nearest if necessary */ 1187 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { 1188 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 1189 FFSWAP(VP8mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 1190 } 1191 1192 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { 1193 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { 1194 /* Choose the best mv out of 0,0 and the nearest mv */ 1195 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); 1196 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + 1197 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + 1198 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); 1199 1200 if (vpx_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { 1201 mb->mode = VP8_MVMODE_SPLIT; 1202 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1]; 1203 } else { 1204 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]); 1205 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]); 1206 mb->bmv[0] = mb->mv; 1207 } 1208 } else { 1209 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]); 1210 mb->bmv[0] = mb->mv; 1211 } 1212 } else { 1213 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]); 1214 mb->bmv[0] = mb->mv; 1215 } 1216 } else { 1217 mb->mode = VP8_MVMODE_ZERO; 1218 AV_ZERO32(&mb->mv); 1219 mb->bmv[0] = mb->mv; 1220 } 1221 } 1222 1223 static av_always_inline 1224 void decode_intra4x4_modes(VP8Context *s, VPXRangeCoder *c, VP8Macroblock *mb, 1225 int mb_x, int keyframe, int layout) 1226 { 1227 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; 1228 1229 if (layout) { 1230 VP8Macroblock *mb_top = mb - s->mb_width - 1; 1231 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4); 1232 } 1233 if (keyframe) { 1234 int x, y; 1235 uint8_t *top; 1236 uint8_t *const left = s->intra4x4_pred_mode_left; 1237 if (layout) 1238 top = mb->intra4x4_pred_mode_top; 1239 else 1240 top = s->intra4x4_pred_mode_top + 4 * mb_x; 1241 for (y = 0; y < 4; y++) { 1242 for (x = 0; x < 4; x++) { 1243 const uint8_t *ctx; 1244 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; 1245 *intra4x4 = vp89_rac_get_tree(c, vp8_pred4x4_tree, ctx); 1246 left[y] = top[x] = *intra4x4; 1247 intra4x4++; 1248 } 1249 } 1250 } else { 1251 int i; 1252 for (i = 0; i < 16; i++) 1253 intra4x4[i] = vp89_rac_get_tree(c, vp8_pred4x4_tree, 1254 vp8_pred4x4_prob_inter); 1255 } 1256 } 1257 1258 static av_always_inline 1259 void decode_mb_mode(VP8Context *s, const VP8mvbounds *mv_bounds, 1260 VP8Macroblock *mb, int mb_x, int mb_y, 1261 uint8_t *segment, const uint8_t *ref, int layout, int is_vp7) 1262 { 1263 VPXRangeCoder *c = &s->c; 1264 static const char * const vp7_feature_name[] = { "q-index", 1265 "lf-delta", 1266 "partial-golden-update", 1267 "blit-pitch" }; 1268 if (is_vp7) { 1269 int i; 1270 *segment = 0; 1271 for (i = 0; i < 4; i++) { 1272 if (s->feature_enabled[i]) { 1273 if (vpx_rac_get_prob_branchy(c, s->feature_present_prob[i])) { 1274 int index = vp89_rac_get_tree(c, vp7_feature_index_tree, 1275 s->feature_index_prob[i]); 1276 av_log(s->avctx, AV_LOG_WARNING, 1277 "Feature %s present in macroblock (value 0x%x)\n", 1278 vp7_feature_name[i], s->feature_value[i][index]); 1279 } 1280 } 1281 } 1282 } else if (s->segmentation.update_map) { 1283 int bit = vpx_rac_get_prob(c, s->prob->segmentid[0]); 1284 *segment = vpx_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit; 1285 } else if (s->segmentation.enabled) 1286 *segment = ref ? *ref : *segment; 1287 mb->segment = *segment; 1288 1289 mb->skip = s->mbskip_enabled ? vpx_rac_get_prob(c, s->prob->mbskip) : 0; 1290 1291 if (s->keyframe) { 1292 mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_intra, 1293 vp8_pred16x16_prob_intra); 1294 1295 if (mb->mode == MODE_I4x4) { 1296 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); 1297 } else { 1298 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode 1299 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u; 1300 if (s->mb_layout) 1301 AV_WN32A(mb->intra4x4_pred_mode_top, modes); 1302 else 1303 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); 1304 AV_WN32A(s->intra4x4_pred_mode_left, modes); 1305 } 1306 1307 mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree, 1308 vp8_pred8x8c_prob_intra); 1309 mb->ref_frame = VP8_FRAME_CURRENT; 1310 } else if (vpx_rac_get_prob_branchy(c, s->prob->intra)) { 1311 // inter MB, 16.2 1312 if (vpx_rac_get_prob_branchy(c, s->prob->last)) 1313 mb->ref_frame = 1314 (!is_vp7 && vpx_rac_get_prob(c, s->prob->golden)) ? VP8_FRAME_ALTREF 1315 : VP8_FRAME_GOLDEN; 1316 else 1317 mb->ref_frame = VP8_FRAME_PREVIOUS; 1318 s->ref_count[mb->ref_frame - 1]++; 1319 1320 // motion vectors, 16.3 1321 if (is_vp7) 1322 vp7_decode_mvs(s, mb, mb_x, mb_y, layout); 1323 else 1324 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout); 1325 } else { 1326 // intra MB, 16.1 1327 mb->mode = vp89_rac_get_tree(c, vp8_pred16x16_tree_inter, 1328 s->prob->pred16x16); 1329 1330 if (mb->mode == MODE_I4x4) 1331 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); 1332 1333 mb->chroma_pred_mode = vp89_rac_get_tree(c, vp8_pred8x8c_tree, 1334 s->prob->pred8x8c); 1335 mb->ref_frame = VP8_FRAME_CURRENT; 1336 mb->partitioning = VP8_SPLITMVMODE_NONE; 1337 AV_ZERO32(&mb->bmv[0]); 1338 } 1339 } 1340 1341 /** 1342 * @param r arithmetic bitstream reader context 1343 * @param block destination for block coefficients 1344 * @param probs probabilities to use when reading trees from the bitstream 1345 * @param i initial coeff index, 0 unless a separate DC block is coded 1346 * @param qmul array holding the dc/ac dequant factor at position 0/1 1347 * 1348 * @return 0 if no coeffs were decoded 1349 * otherwise, the index of the last coeff decoded plus one 1350 */ 1351 static av_always_inline 1352 int decode_block_coeffs_internal(VPXRangeCoder *r, int16_t block[16], 1353 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1354 int i, const uint8_t *token_prob, const int16_t qmul[2], 1355 const uint8_t scan[16], int vp7) 1356 { 1357 VPXRangeCoder c = *r; 1358 goto skip_eob; 1359 do { 1360 int coeff; 1361 restart: 1362 if (!vpx_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB 1363 break; 1364 1365 skip_eob: 1366 if (!vpx_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0 1367 if (++i == 16) 1368 break; // invalid input; blocks should end with EOB 1369 token_prob = probs[i][0]; 1370 if (vp7) 1371 goto restart; 1372 goto skip_eob; 1373 } 1374 1375 if (!vpx_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 1376 coeff = 1; 1377 token_prob = probs[i + 1][1]; 1378 } else { 1379 if (!vpx_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 1380 coeff = vpx_rac_get_prob_branchy(&c, token_prob[4]); 1381 if (coeff) 1382 coeff += vpx_rac_get_prob(&c, token_prob[5]); 1383 coeff += 2; 1384 } else { 1385 // DCT_CAT* 1386 if (!vpx_rac_get_prob_branchy(&c, token_prob[6])) { 1387 if (!vpx_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 1388 coeff = 5 + vpx_rac_get_prob(&c, vp8_dct_cat1_prob[0]); 1389 } else { // DCT_CAT2 1390 coeff = 7; 1391 coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; 1392 coeff += vpx_rac_get_prob(&c, vp8_dct_cat2_prob[1]); 1393 } 1394 } else { // DCT_CAT3 and up 1395 int a = vpx_rac_get_prob(&c, token_prob[8]); 1396 int b = vpx_rac_get_prob(&c, token_prob[9 + a]); 1397 int cat = (a << 1) + b; 1398 coeff = 3 + (8 << cat); 1399 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); 1400 } 1401 } 1402 token_prob = probs[i + 1][2]; 1403 } 1404 block[scan[i]] = (vp89_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; 1405 } while (++i < 16); 1406 1407 *r = c; 1408 return i; 1409 } 1410 1411 static av_always_inline 1412 int inter_predict_dc(int16_t block[16], int16_t pred[2]) 1413 { 1414 int16_t dc = block[0]; 1415 int ret = 0; 1416 1417 if (pred[1] > 3) { 1418 dc += pred[0]; 1419 ret = 1; 1420 } 1421 1422 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) { 1423 block[0] = pred[0] = dc; 1424 pred[1] = 0; 1425 } else { 1426 if (pred[0] == dc) 1427 pred[1]++; 1428 block[0] = pred[0] = dc; 1429 } 1430 1431 return ret; 1432 } 1433 1434 static int vp7_decode_block_coeffs_internal(VPXRangeCoder *r, 1435 int16_t block[16], 1436 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1437 int i, const uint8_t *token_prob, 1438 const int16_t qmul[2], 1439 const uint8_t scan[16]) 1440 { 1441 return decode_block_coeffs_internal(r, block, probs, i, 1442 token_prob, qmul, scan, IS_VP7); 1443 } 1444 1445 #ifndef vp8_decode_block_coeffs_internal 1446 static int vp8_decode_block_coeffs_internal(VPXRangeCoder *r, 1447 int16_t block[16], 1448 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1449 int i, const uint8_t *token_prob, 1450 const int16_t qmul[2]) 1451 { 1452 return decode_block_coeffs_internal(r, block, probs, i, 1453 token_prob, qmul, ff_zigzag_scan, IS_VP8); 1454 } 1455 #endif 1456 1457 /** 1458 * @param c arithmetic bitstream reader context 1459 * @param block destination for block coefficients 1460 * @param probs probabilities to use when reading trees from the bitstream 1461 * @param i initial coeff index, 0 unless a separate DC block is coded 1462 * @param zero_nhood the initial prediction context for number of surrounding 1463 * all-zero blocks (only left/top, so 0-2) 1464 * @param qmul array holding the dc/ac dequant factor at position 0/1 1465 * @param scan scan pattern (VP7 only) 1466 * 1467 * @return 0 if no coeffs were decoded 1468 * otherwise, the index of the last coeff decoded plus one 1469 */ 1470 static av_always_inline 1471 int decode_block_coeffs(VPXRangeCoder *c, int16_t block[16], 1472 uint8_t probs[16][3][NUM_DCT_TOKENS - 1], 1473 int i, int zero_nhood, const int16_t qmul[2], 1474 const uint8_t scan[16], int vp7) 1475 { 1476 const uint8_t *token_prob = probs[i][zero_nhood]; 1477 if (!vpx_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 1478 return 0; 1479 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, 1480 token_prob, qmul, scan) 1481 : vp8_decode_block_coeffs_internal(c, block, probs, i, 1482 token_prob, qmul); 1483 } 1484 1485 static av_always_inline 1486 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VPXRangeCoder *c, 1487 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9], 1488 int is_vp7) 1489 { 1490 int i, x, y, luma_start = 0, luma_ctx = 3; 1491 int nnz_pred, nnz, nnz_total = 0; 1492 int segment = mb->segment; 1493 int block_dc = 0; 1494 1495 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) { 1496 nnz_pred = t_nnz[8] + l_nnz[8]; 1497 1498 // decode DC values and do hadamard 1499 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, 1500 nnz_pred, s->qmat[segment].luma_dc_qmul, 1501 ff_zigzag_scan, is_vp7); 1502 l_nnz[8] = t_nnz[8] = !!nnz; 1503 1504 if (is_vp7 && mb->mode > MODE_I4x4) { 1505 nnz |= inter_predict_dc(td->block_dc, 1506 s->inter_dc_pred[mb->ref_frame - 1]); 1507 } 1508 1509 if (nnz) { 1510 nnz_total += nnz; 1511 block_dc = 1; 1512 if (nnz == 1) 1513 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); 1514 else 1515 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); 1516 } 1517 luma_start = 1; 1518 luma_ctx = 0; 1519 } 1520 1521 // luma blocks 1522 for (y = 0; y < 4; y++) 1523 for (x = 0; x < 4; x++) { 1524 nnz_pred = l_nnz[y] + t_nnz[x]; 1525 nnz = decode_block_coeffs(c, td->block[y][x], 1526 s->prob->token[luma_ctx], 1527 luma_start, nnz_pred, 1528 s->qmat[segment].luma_qmul, 1529 s->prob[0].scan, is_vp7); 1530 /* nnz+block_dc may be one more than the actual last index, 1531 * but we don't care */ 1532 td->non_zero_count_cache[y][x] = nnz + block_dc; 1533 t_nnz[x] = l_nnz[y] = !!nnz; 1534 nnz_total += nnz; 1535 } 1536 1537 // chroma blocks 1538 // TODO: what to do about dimensions? 2nd dim for luma is x, 1539 // but for chroma it's (y<<1)|x 1540 for (i = 4; i < 6; i++) 1541 for (y = 0; y < 2; y++) 1542 for (x = 0; x < 2; x++) { 1543 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x]; 1544 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x], 1545 s->prob->token[2], 0, nnz_pred, 1546 s->qmat[segment].chroma_qmul, 1547 s->prob[0].scan, is_vp7); 1548 td->non_zero_count_cache[i][(y << 1) + x] = nnz; 1549 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz; 1550 nnz_total += nnz; 1551 } 1552 1553 // if there were no coded coeffs despite the macroblock not being marked skip, 1554 // we MUST not do the inner loop filter and should not do IDCT 1555 // Since skip isn't used for bitstream prediction, just manually set it. 1556 if (!nnz_total) 1557 mb->skip = 1; 1558 } 1559 1560 static av_always_inline 1561 void backup_mb_border(uint8_t *top_border, const uint8_t *src_y, 1562 const uint8_t *src_cb, const uint8_t *src_cr, 1563 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple) 1564 { 1565 AV_COPY128(top_border, src_y + 15 * linesize); 1566 if (!simple) { 1567 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); 1568 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); 1569 } 1570 } 1571 1572 static av_always_inline 1573 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, 1574 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x, 1575 int mb_y, int mb_width, int simple, int xchg) 1576 { 1577 uint8_t *top_border_m1 = top_border - 32; // for TL prediction 1578 src_y -= linesize; 1579 src_cb -= uvlinesize; 1580 src_cr -= uvlinesize; 1581 1582 #define XCHG(a, b, xchg) \ 1583 do { \ 1584 if (xchg) \ 1585 AV_SWAP64(b, a); \ 1586 else \ 1587 AV_COPY64(b, a); \ 1588 } while (0) 1589 1590 XCHG(top_border_m1 + 8, src_y - 8, xchg); 1591 XCHG(top_border, src_y, xchg); 1592 XCHG(top_border + 8, src_y + 8, 1); 1593 if (mb_x < mb_width - 1) 1594 XCHG(top_border + 32, src_y + 16, 1); 1595 1596 // only copy chroma for normal loop filter 1597 // or to initialize the top row to 127 1598 if (!simple || !mb_y) { 1599 XCHG(top_border_m1 + 16, src_cb - 8, xchg); 1600 XCHG(top_border_m1 + 24, src_cr - 8, xchg); 1601 XCHG(top_border + 16, src_cb, 1); 1602 XCHG(top_border + 24, src_cr, 1); 1603 } 1604 } 1605 1606 static av_always_inline 1607 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) 1608 { 1609 if (!mb_x) 1610 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 1611 else 1612 return mb_y ? mode : LEFT_DC_PRED8x8; 1613 } 1614 1615 static av_always_inline 1616 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7) 1617 { 1618 if (!mb_x) 1619 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8); 1620 else 1621 return mb_y ? mode : HOR_PRED8x8; 1622 } 1623 1624 static av_always_inline 1625 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7) 1626 { 1627 switch (mode) { 1628 case DC_PRED8x8: 1629 return check_dc_pred8x8_mode(mode, mb_x, mb_y); 1630 case VERT_PRED8x8: 1631 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode; 1632 case HOR_PRED8x8: 1633 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode; 1634 case PLANE_PRED8x8: /* TM */ 1635 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7); 1636 } 1637 return mode; 1638 } 1639 1640 static av_always_inline 1641 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7) 1642 { 1643 if (!mb_x) { 1644 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED); 1645 } else { 1646 return mb_y ? mode : HOR_VP8_PRED; 1647 } 1648 } 1649 1650 static av_always_inline 1651 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, 1652 int *copy_buf, int vp7) 1653 { 1654 switch (mode) { 1655 case VERT_PRED: 1656 if (!mb_x && mb_y) { 1657 *copy_buf = 1; 1658 return mode; 1659 } 1660 /* fall-through */ 1661 case DIAG_DOWN_LEFT_PRED: 1662 case VERT_LEFT_PRED: 1663 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode; 1664 case HOR_PRED: 1665 if (!mb_y) { 1666 *copy_buf = 1; 1667 return mode; 1668 } 1669 /* fall-through */ 1670 case HOR_UP_PRED: 1671 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode; 1672 case TM_VP8_PRED: 1673 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7); 1674 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions 1675 * as 16x16/8x8 DC */ 1676 case DIAG_DOWN_RIGHT_PRED: 1677 case VERT_RIGHT_PRED: 1678 case HOR_DOWN_PRED: 1679 if (!mb_y || !mb_x) 1680 *copy_buf = 1; 1681 return mode; 1682 } 1683 return mode; 1684 } 1685 1686 static av_always_inline 1687 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], 1688 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7) 1689 { 1690 int x, y, mode, nnz; 1691 uint32_t tr; 1692 1693 /* for the first row, we need to run xchg_mb_border to init the top edge 1694 * to 127 otherwise, skip it if we aren't going to deblock */ 1695 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) 1696 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], 1697 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, 1698 s->filter.simple, 1); 1699 1700 if (mb->mode < MODE_I4x4) { 1701 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7); 1702 s->hpc.pred16x16[mode](dst[0], s->linesize); 1703 } else { 1704 uint8_t *ptr = dst[0]; 1705 const uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb; 1706 const uint8_t lo = is_vp7 ? 128 : 127; 1707 const uint8_t hi = is_vp7 ? 128 : 129; 1708 const uint8_t tr_top[4] = { lo, lo, lo, lo }; 1709 1710 // all blocks on the right edge of the macroblock use bottom edge 1711 // the top macroblock for their topright edge 1712 const uint8_t *tr_right = ptr - s->linesize + 16; 1713 1714 // if we're on the right edge of the frame, said edge is extended 1715 // from the top macroblock 1716 if (mb_y && mb_x == s->mb_width - 1) { 1717 tr = tr_right[-1] * 0x01010101u; 1718 tr_right = (uint8_t *) &tr; 1719 } 1720 1721 if (mb->skip) 1722 AV_ZERO128(td->non_zero_count_cache); 1723 1724 for (y = 0; y < 4; y++) { 1725 const uint8_t *topright = ptr + 4 - s->linesize; 1726 for (x = 0; x < 4; x++) { 1727 int copy = 0; 1728 ptrdiff_t linesize = s->linesize; 1729 uint8_t *dst = ptr + 4 * x; 1730 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]); 1731 1732 if ((y == 0 || x == 3) && mb_y == 0) { 1733 topright = tr_top; 1734 } else if (x == 3) 1735 topright = tr_right; 1736 1737 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, 1738 mb_y + y, ©, is_vp7); 1739 if (copy) { 1740 dst = copy_dst + 12; 1741 linesize = 8; 1742 if (!(mb_y + y)) { 1743 copy_dst[3] = lo; 1744 AV_WN32A(copy_dst + 4, lo * 0x01010101U); 1745 } else { 1746 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize); 1747 if (!(mb_x + x)) { 1748 copy_dst[3] = hi; 1749 } else { 1750 copy_dst[3] = ptr[4 * x - s->linesize - 1]; 1751 } 1752 } 1753 if (!(mb_x + x)) { 1754 copy_dst[11] = 1755 copy_dst[19] = 1756 copy_dst[27] = 1757 copy_dst[35] = hi; 1758 } else { 1759 copy_dst[11] = ptr[4 * x - 1]; 1760 copy_dst[19] = ptr[4 * x + s->linesize - 1]; 1761 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1]; 1762 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1]; 1763 } 1764 } 1765 s->hpc.pred4x4[mode](dst, topright, linesize); 1766 if (copy) { 1767 AV_COPY32(ptr + 4 * x, copy_dst + 12); 1768 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20); 1769 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28); 1770 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36); 1771 } 1772 1773 nnz = td->non_zero_count_cache[y][x]; 1774 if (nnz) { 1775 if (nnz == 1) 1776 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x, 1777 td->block[y][x], s->linesize); 1778 else 1779 s->vp8dsp.vp8_idct_add(ptr + 4 * x, 1780 td->block[y][x], s->linesize); 1781 } 1782 topright += 4; 1783 } 1784 1785 ptr += 4 * s->linesize; 1786 intra4x4 += 4; 1787 } 1788 } 1789 1790 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, 1791 mb_x, mb_y, is_vp7); 1792 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); 1793 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); 1794 1795 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) 1796 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], 1797 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, 1798 s->filter.simple, 0); 1799 } 1800 1801 static const uint8_t subpel_idx[3][8] = { 1802 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, 1803 // also function pointer index 1804 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required 1805 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels 1806 }; 1807 1808 /** 1809 * luma MC function 1810 * 1811 * @param s VP8 decoding context 1812 * @param dst target buffer for block data at block position 1813 * @param ref reference picture buffer at origin (0, 0) 1814 * @param mv motion vector (relative to block position) to get pixel data from 1815 * @param x_off horizontal position of block from origin (0, 0) 1816 * @param y_off vertical position of block from origin (0, 0) 1817 * @param block_w width of block (16, 8 or 4) 1818 * @param block_h height of block (always same as block_w) 1819 * @param width width of src/dst plane data 1820 * @param height height of src/dst plane data 1821 * @param linesize size of a single line of plane data, including padding 1822 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1823 */ 1824 static av_always_inline 1825 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, 1826 const ProgressFrame *ref, const VP8mv *mv, 1827 int x_off, int y_off, int block_w, int block_h, 1828 int width, int height, ptrdiff_t linesize, 1829 vp8_mc_func mc_func[3][3]) 1830 { 1831 const uint8_t *src = ref->f->data[0]; 1832 1833 if (AV_RN32A(mv)) { 1834 ptrdiff_t src_linesize = linesize; 1835 1836 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx]; 1837 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my]; 1838 1839 x_off += mv->x >> 2; 1840 y_off += mv->y >> 2; 1841 1842 // edge emulation 1843 ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4); 1844 src += y_off * linesize + x_off; 1845 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || 1846 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { 1847 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 1848 src - my_idx * linesize - mx_idx, 1849 EDGE_EMU_LINESIZE, linesize, 1850 block_w + subpel_idx[1][mx], 1851 block_h + subpel_idx[1][my], 1852 x_off - mx_idx, y_off - my_idx, 1853 width, height); 1854 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; 1855 src_linesize = EDGE_EMU_LINESIZE; 1856 } 1857 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my); 1858 } else { 1859 ff_progress_frame_await(ref, (3 + y_off + block_h) >> 4); 1860 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, 1861 linesize, block_h, 0, 0); 1862 } 1863 } 1864 1865 /** 1866 * chroma MC function 1867 * 1868 * @param s VP8 decoding context 1869 * @param dst1 target buffer for block data at block position (U plane) 1870 * @param dst2 target buffer for block data at block position (V plane) 1871 * @param ref reference picture buffer at origin (0, 0) 1872 * @param mv motion vector (relative to block position) to get pixel data from 1873 * @param x_off horizontal position of block from origin (0, 0) 1874 * @param y_off vertical position of block from origin (0, 0) 1875 * @param block_w width of block (16, 8 or 4) 1876 * @param block_h height of block (always same as block_w) 1877 * @param width width of src/dst plane data 1878 * @param height height of src/dst plane data 1879 * @param linesize size of a single line of plane data, including padding 1880 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1881 */ 1882 static av_always_inline 1883 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, 1884 uint8_t *dst2, const ProgressFrame *ref, const VP8mv *mv, 1885 int x_off, int y_off, int block_w, int block_h, 1886 int width, int height, ptrdiff_t linesize, 1887 vp8_mc_func mc_func[3][3]) 1888 { 1889 const uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2]; 1890 1891 if (AV_RN32A(mv)) { 1892 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx]; 1893 int my = mv->y & 7, my_idx = subpel_idx[0][my]; 1894 1895 x_off += mv->x >> 3; 1896 y_off += mv->y >> 3; 1897 1898 // edge emulation 1899 src1 += y_off * linesize + x_off; 1900 src2 += y_off * linesize + x_off; 1901 ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3); 1902 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || 1903 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { 1904 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 1905 src1 - my_idx * linesize - mx_idx, 1906 EDGE_EMU_LINESIZE, linesize, 1907 block_w + subpel_idx[1][mx], 1908 block_h + subpel_idx[1][my], 1909 x_off - mx_idx, y_off - my_idx, width, height); 1910 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; 1911 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my); 1912 1913 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 1914 src2 - my_idx * linesize - mx_idx, 1915 EDGE_EMU_LINESIZE, linesize, 1916 block_w + subpel_idx[1][mx], 1917 block_h + subpel_idx[1][my], 1918 x_off - mx_idx, y_off - my_idx, width, height); 1919 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; 1920 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my); 1921 } else { 1922 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); 1923 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); 1924 } 1925 } else { 1926 ff_progress_frame_await(ref, (3 + y_off + block_h) >> 3); 1927 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); 1928 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); 1929 } 1930 } 1931 1932 static av_always_inline 1933 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], 1934 const ProgressFrame *ref_frame, int x_off, int y_off, 1935 int bx_off, int by_off, int block_w, int block_h, 1936 int width, int height, const VP8mv *mv) 1937 { 1938 VP8mv uvmv = *mv; 1939 1940 /* Y */ 1941 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off, 1942 ref_frame, mv, x_off + bx_off, y_off + by_off, 1943 block_w, block_h, width, height, s->linesize, 1944 s->put_pixels_tab[block_w == 8]); 1945 1946 /* U/V */ 1947 if (s->profile == 3) { 1948 /* this block only applies VP8; it is safe to check 1949 * only the profile, as VP7 profile <= 1 */ 1950 uvmv.x &= ~7; 1951 uvmv.y &= ~7; 1952 } 1953 x_off >>= 1; 1954 y_off >>= 1; 1955 bx_off >>= 1; 1956 by_off >>= 1; 1957 width >>= 1; 1958 height >>= 1; 1959 block_w >>= 1; 1960 block_h >>= 1; 1961 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, 1962 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, 1963 &uvmv, x_off + bx_off, y_off + by_off, 1964 block_w, block_h, width, height, s->uvlinesize, 1965 s->put_pixels_tab[1 + (block_w == 4)]); 1966 } 1967 1968 /* Fetch pixels for estimated mv 4 macroblocks ahead. 1969 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ 1970 static av_always_inline 1971 void prefetch_motion(const VP8Context *s, const VP8Macroblock *mb, 1972 int mb_x, int mb_y, int mb_xy, int ref) 1973 { 1974 /* Don't prefetch refs that haven't been used very often this frame. */ 1975 if (s->ref_count[ref - 1] > (mb_xy >> 5)) { 1976 int x_off = mb_x << 4, y_off = mb_y << 4; 1977 int mx = (mb->mv.x >> 2) + x_off + 8; 1978 int my = (mb->mv.y >> 2) + y_off; 1979 uint8_t **src = s->framep[ref]->tf.f->data; 1980 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64; 1981 /* For threading, a ff_thread_await_progress here might be useful, but 1982 * it actually slows down the decoder. Since a bad prefetch doesn't 1983 * generate bad decoder output, we don't run it here. */ 1984 s->vdsp.prefetch(src[0] + off, s->linesize, 4); 1985 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64; 1986 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); 1987 } 1988 } 1989 1990 /** 1991 * Apply motion vectors to prediction buffer, chapter 18. 1992 */ 1993 static av_always_inline 1994 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], 1995 VP8Macroblock *mb, int mb_x, int mb_y) 1996 { 1997 int x_off = mb_x << 4, y_off = mb_y << 4; 1998 int width = 16 * s->mb_width, height = 16 * s->mb_height; 1999 const ProgressFrame *ref = &s->framep[mb->ref_frame]->tf; 2000 const VP8mv *bmv = mb->bmv; 2001 2002 switch (mb->partitioning) { 2003 case VP8_SPLITMVMODE_NONE: 2004 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2005 0, 0, 16, 16, width, height, &mb->mv); 2006 break; 2007 case VP8_SPLITMVMODE_4x4: { 2008 int x, y; 2009 VP8mv uvmv; 2010 2011 /* Y */ 2012 for (y = 0; y < 4; y++) { 2013 for (x = 0; x < 4; x++) { 2014 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4, 2015 ref, &bmv[4 * y + x], 2016 4 * x + x_off, 4 * y + y_off, 4, 4, 2017 width, height, s->linesize, 2018 s->put_pixels_tab[2]); 2019 } 2020 } 2021 2022 /* U/V */ 2023 x_off >>= 1; 2024 y_off >>= 1; 2025 width >>= 1; 2026 height >>= 1; 2027 for (y = 0; y < 2; y++) { 2028 for (x = 0; x < 2; x++) { 2029 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x + 2030 mb->bmv[2 * y * 4 + 2 * x + 1].x + 2031 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x + 2032 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x; 2033 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y + 2034 mb->bmv[2 * y * 4 + 2 * x + 1].y + 2035 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y + 2036 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y; 2037 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2; 2038 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2; 2039 if (s->profile == 3) { 2040 uvmv.x &= ~7; 2041 uvmv.y &= ~7; 2042 } 2043 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4, 2044 dst[2] + 4 * y * s->uvlinesize + x * 4, ref, 2045 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4, 2046 width, height, s->uvlinesize, 2047 s->put_pixels_tab[2]); 2048 } 2049 } 2050 break; 2051 } 2052 case VP8_SPLITMVMODE_16x8: 2053 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2054 0, 0, 16, 8, width, height, &bmv[0]); 2055 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2056 0, 8, 16, 8, width, height, &bmv[1]); 2057 break; 2058 case VP8_SPLITMVMODE_8x16: 2059 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2060 0, 0, 8, 16, width, height, &bmv[0]); 2061 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2062 8, 0, 8, 16, width, height, &bmv[1]); 2063 break; 2064 case VP8_SPLITMVMODE_8x8: 2065 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2066 0, 0, 8, 8, width, height, &bmv[0]); 2067 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2068 8, 0, 8, 8, width, height, &bmv[1]); 2069 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2070 0, 8, 8, 8, width, height, &bmv[2]); 2071 vp8_mc_part(s, td, dst, ref, x_off, y_off, 2072 8, 8, 8, 8, width, height, &bmv[3]); 2073 break; 2074 } 2075 } 2076 2077 static av_always_inline 2078 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3], 2079 const VP8Macroblock *mb) 2080 { 2081 int x, y, ch; 2082 2083 if (mb->mode != MODE_I4x4) { 2084 uint8_t *y_dst = dst[0]; 2085 for (y = 0; y < 4; y++) { 2086 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); 2087 if (nnz4) { 2088 if (nnz4 & ~0x01010101) { 2089 for (x = 0; x < 4; x++) { 2090 if ((uint8_t) nnz4 == 1) 2091 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x, 2092 td->block[y][x], 2093 s->linesize); 2094 else if ((uint8_t) nnz4 > 1) 2095 s->vp8dsp.vp8_idct_add(y_dst + 4 * x, 2096 td->block[y][x], 2097 s->linesize); 2098 nnz4 >>= 8; 2099 if (!nnz4) 2100 break; 2101 } 2102 } else { 2103 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); 2104 } 2105 } 2106 y_dst += 4 * s->linesize; 2107 } 2108 } 2109 2110 for (ch = 0; ch < 2; ch++) { 2111 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]); 2112 if (nnz4) { 2113 uint8_t *ch_dst = dst[1 + ch]; 2114 if (nnz4 & ~0x01010101) { 2115 for (y = 0; y < 2; y++) { 2116 for (x = 0; x < 2; x++) { 2117 if ((uint8_t) nnz4 == 1) 2118 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x, 2119 td->block[4 + ch][(y << 1) + x], 2120 s->uvlinesize); 2121 else if ((uint8_t) nnz4 > 1) 2122 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x, 2123 td->block[4 + ch][(y << 1) + x], 2124 s->uvlinesize); 2125 nnz4 >>= 8; 2126 if (!nnz4) 2127 goto chroma_idct_end; 2128 } 2129 ch_dst += 4 * s->uvlinesize; 2130 } 2131 } else { 2132 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize); 2133 } 2134 } 2135 chroma_idct_end: 2136 ; 2137 } 2138 } 2139 2140 static av_always_inline 2141 void filter_level_for_mb(const VP8Context *s, const VP8Macroblock *mb, 2142 VP8FilterStrength *f, int is_vp7) 2143 { 2144 int interior_limit, filter_level; 2145 2146 if (s->segmentation.enabled) { 2147 filter_level = s->segmentation.filter_level[mb->segment]; 2148 if (!s->segmentation.absolute_vals) 2149 filter_level += s->filter.level; 2150 } else 2151 filter_level = s->filter.level; 2152 2153 if (s->lf_delta.enabled) { 2154 filter_level += s->lf_delta.ref[mb->ref_frame]; 2155 filter_level += s->lf_delta.mode[mb->mode]; 2156 } 2157 2158 filter_level = av_clip_uintp2(filter_level, 6); 2159 2160 interior_limit = filter_level; 2161 if (s->filter.sharpness) { 2162 interior_limit >>= (s->filter.sharpness + 3) >> 2; 2163 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); 2164 } 2165 interior_limit = FFMAX(interior_limit, 1); 2166 2167 f->filter_level = filter_level; 2168 f->inner_limit = interior_limit; 2169 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 || 2170 mb->mode == VP8_MVMODE_SPLIT; 2171 } 2172 2173 static av_always_inline 2174 void filter_mb(const VP8Context *s, uint8_t *const dst[3], const VP8FilterStrength *f, 2175 int mb_x, int mb_y, int is_vp7) 2176 { 2177 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh; 2178 int filter_level = f->filter_level; 2179 int inner_limit = f->inner_limit; 2180 int inner_filter = f->inner_filter; 2181 ptrdiff_t linesize = s->linesize; 2182 ptrdiff_t uvlinesize = s->uvlinesize; 2183 static const uint8_t hev_thresh_lut[2][64] = { 2184 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2185 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2186 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2187 3, 3, 3, 3 }, 2188 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2189 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2190 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2191 2, 2, 2, 2 } 2192 }; 2193 2194 if (!filter_level) 2195 return; 2196 2197 if (is_vp7) { 2198 bedge_lim_y = filter_level; 2199 bedge_lim_uv = filter_level * 2; 2200 mbedge_lim = filter_level + 2; 2201 } else { 2202 bedge_lim_y = 2203 bedge_lim_uv = filter_level * 2 + inner_limit; 2204 mbedge_lim = bedge_lim_y + 4; 2205 } 2206 2207 hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; 2208 2209 if (mb_x) { 2210 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, 2211 mbedge_lim, inner_limit, hev_thresh); 2212 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 2213 mbedge_lim, inner_limit, hev_thresh); 2214 } 2215 2216 #define H_LOOP_FILTER_16Y_INNER(cond) \ 2217 if (cond && inner_filter) { \ 2218 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \ 2219 bedge_lim_y, inner_limit, \ 2220 hev_thresh); \ 2221 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \ 2222 bedge_lim_y, inner_limit, \ 2223 hev_thresh); \ 2224 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \ 2225 bedge_lim_y, inner_limit, \ 2226 hev_thresh); \ 2227 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \ 2228 uvlinesize, bedge_lim_uv, \ 2229 inner_limit, hev_thresh); \ 2230 } 2231 2232 H_LOOP_FILTER_16Y_INNER(!is_vp7) 2233 2234 if (mb_y) { 2235 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, 2236 mbedge_lim, inner_limit, hev_thresh); 2237 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 2238 mbedge_lim, inner_limit, hev_thresh); 2239 } 2240 2241 if (inner_filter) { 2242 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize, 2243 linesize, bedge_lim_y, 2244 inner_limit, hev_thresh); 2245 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize, 2246 linesize, bedge_lim_y, 2247 inner_limit, hev_thresh); 2248 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize, 2249 linesize, bedge_lim_y, 2250 inner_limit, hev_thresh); 2251 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 2252 dst[2] + 4 * uvlinesize, 2253 uvlinesize, bedge_lim_uv, 2254 inner_limit, hev_thresh); 2255 } 2256 2257 H_LOOP_FILTER_16Y_INNER(is_vp7) 2258 } 2259 2260 static av_always_inline 2261 void filter_mb_simple(const VP8Context *s, uint8_t *dst, const VP8FilterStrength *f, 2262 int mb_x, int mb_y) 2263 { 2264 int mbedge_lim, bedge_lim; 2265 int filter_level = f->filter_level; 2266 int inner_limit = f->inner_limit; 2267 int inner_filter = f->inner_filter; 2268 ptrdiff_t linesize = s->linesize; 2269 2270 if (!filter_level) 2271 return; 2272 2273 bedge_lim = 2 * filter_level + inner_limit; 2274 mbedge_lim = bedge_lim + 4; 2275 2276 if (mb_x) 2277 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 2278 if (inner_filter) { 2279 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim); 2280 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim); 2281 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim); 2282 } 2283 2284 if (mb_y) 2285 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 2286 if (inner_filter) { 2287 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim); 2288 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim); 2289 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim); 2290 } 2291 } 2292 2293 #define MARGIN (16 << 2) 2294 static av_always_inline 2295 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, 2296 const VP8Frame *prev_frame, int is_vp7) 2297 { 2298 VP8Context *s = avctx->priv_data; 2299 int mb_x, mb_y; 2300 2301 s->mv_bounds.mv_min.y = -MARGIN; 2302 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; 2303 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { 2304 VP8Macroblock *mb = s->macroblocks_base + 2305 ((s->mb_width + 1) * (mb_y + 1) + 1); 2306 int mb_xy = mb_y * s->mb_width; 2307 2308 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); 2309 2310 s->mv_bounds.mv_min.x = -MARGIN; 2311 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; 2312 2313 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { 2314 if (vpx_rac_is_end(&s->c)) { 2315 return AVERROR_INVALIDDATA; 2316 } 2317 if (mb_y == 0) 2318 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, 2319 DC_PRED * 0x01010101); 2320 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy, 2321 prev_frame && prev_frame->seg_map ? 2322 prev_frame->seg_map + mb_xy : NULL, 1, is_vp7); 2323 s->mv_bounds.mv_min.x -= 64; 2324 s->mv_bounds.mv_max.x -= 64; 2325 } 2326 s->mv_bounds.mv_min.y -= 64; 2327 s->mv_bounds.mv_max.y -= 64; 2328 } 2329 return 0; 2330 } 2331 2332 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, 2333 const VP8Frame *prev_frame) 2334 { 2335 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7); 2336 } 2337 2338 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, 2339 const VP8Frame *prev_frame) 2340 { 2341 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8); 2342 } 2343 2344 #if HAVE_THREADS 2345 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \ 2346 do { \ 2347 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \ 2348 if (atomic_load(&otd->thread_mb_pos) < tmp) { \ 2349 pthread_mutex_lock(&otd->lock); \ 2350 atomic_store(&td->wait_mb_pos, tmp); \ 2351 do { \ 2352 if (atomic_load(&otd->thread_mb_pos) >= tmp) \ 2353 break; \ 2354 pthread_cond_wait(&otd->cond, &otd->lock); \ 2355 } while (1); \ 2356 atomic_store(&td->wait_mb_pos, INT_MAX); \ 2357 pthread_mutex_unlock(&otd->lock); \ 2358 } \ 2359 } while (0) 2360 2361 #define update_pos(td, mb_y, mb_x) \ 2362 do { \ 2363 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \ 2364 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \ 2365 (num_jobs > 1); \ 2366 int is_null = !next_td || !prev_td; \ 2367 int pos_check = (is_null) ? 1 : \ 2368 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \ 2369 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \ 2370 atomic_store(&td->thread_mb_pos, pos); \ 2371 if (sliced_threading && pos_check) { \ 2372 pthread_mutex_lock(&td->lock); \ 2373 pthread_cond_broadcast(&td->cond); \ 2374 pthread_mutex_unlock(&td->lock); \ 2375 } \ 2376 } while (0) 2377 #else 2378 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0) 2379 #define update_pos(td, mb_y, mb_x) while(0) 2380 #endif 2381 2382 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, 2383 int jobnr, int threadnr, int is_vp7) 2384 { 2385 VP8Context *s = avctx->priv_data; 2386 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; 2387 int mb_y = atomic_load(&td->thread_mb_pos) >> 16; 2388 int mb_x, mb_xy = mb_y * s->mb_width; 2389 int num_jobs = s->num_jobs; 2390 const VP8Frame *prev_frame = s->prev_frame; 2391 VP8Frame *curframe = s->curframe; 2392 VPXRangeCoder *coeff_c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)]; 2393 2394 VP8Macroblock *mb; 2395 uint8_t *dst[3] = { 2396 curframe->tf.f->data[0] + 16 * mb_y * s->linesize, 2397 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize, 2398 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize 2399 }; 2400 2401 if (vpx_rac_is_end(&s->c)) 2402 return AVERROR_INVALIDDATA; 2403 2404 if (mb_y == 0) 2405 prev_td = td; 2406 else 2407 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; 2408 if (mb_y == s->mb_height - 1) 2409 next_td = td; 2410 else 2411 next_td = &s->thread_data[(jobnr + 1) % num_jobs]; 2412 if (s->mb_layout == 1) 2413 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); 2414 else { 2415 // Make sure the previous frame has read its segmentation map, 2416 // if we re-use the same map. 2417 if (prev_frame && s->segmentation.enabled && 2418 !s->segmentation.update_map) 2419 ff_progress_frame_await(&prev_frame->tf, mb_y); 2420 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; 2421 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock 2422 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); 2423 } 2424 2425 if (!is_vp7 || mb_y == 0) 2426 memset(td->left_nnz, 0, sizeof(td->left_nnz)); 2427 2428 td->mv_bounds.mv_min.x = -MARGIN; 2429 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; 2430 2431 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { 2432 if (vpx_rac_is_end(&s->c)) 2433 return AVERROR_INVALIDDATA; 2434 // Wait for previous thread to read mb_x+2, and reach mb_y-1. 2435 if (prev_td != td) { 2436 if (threadnr != 0) { 2437 check_thread_pos(td, prev_td, 2438 mb_x + (is_vp7 ? 2 : 1), 2439 mb_y - (is_vp7 ? 2 : 1)); 2440 } else { 2441 check_thread_pos(td, prev_td, 2442 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3, 2443 mb_y - (is_vp7 ? 2 : 1)); 2444 } 2445 } 2446 2447 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64, 2448 s->linesize, 4); 2449 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64, 2450 dst[2] - dst[1], 2); 2451 2452 if (!s->mb_layout) 2453 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map + mb_xy, 2454 prev_frame && prev_frame->seg_map ? 2455 prev_frame->seg_map + mb_xy : NULL, 0, is_vp7); 2456 2457 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_PREVIOUS); 2458 2459 if (!mb->skip) { 2460 if (vpx_rac_is_end(coeff_c)) 2461 return AVERROR_INVALIDDATA; 2462 decode_mb_coeffs(s, td, coeff_c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7); 2463 } 2464 2465 if (mb->mode <= MODE_I4x4) 2466 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7); 2467 else 2468 inter_predict(s, td, dst, mb, mb_x, mb_y); 2469 2470 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_GOLDEN); 2471 2472 if (!mb->skip) { 2473 idct_mb(s, td, dst, mb); 2474 } else { 2475 AV_ZERO64(td->left_nnz); 2476 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned 2477 2478 /* Reset DC block predictors if they would exist 2479 * if the mb had coefficients */ 2480 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { 2481 td->left_nnz[8] = 0; 2482 s->top_nnz[mb_x][8] = 0; 2483 } 2484 } 2485 2486 if (s->deblock_filter) 2487 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7); 2488 2489 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) { 2490 if (s->filter.simple) 2491 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2492 NULL, NULL, s->linesize, 0, 1); 2493 else 2494 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2495 dst[1], dst[2], s->linesize, s->uvlinesize, 0); 2496 } 2497 2498 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP8_FRAME_ALTREF); 2499 2500 dst[0] += 16; 2501 dst[1] += 8; 2502 dst[2] += 8; 2503 td->mv_bounds.mv_min.x -= 64; 2504 td->mv_bounds.mv_max.x -= 64; 2505 2506 if (mb_x == s->mb_width + 1) { 2507 update_pos(td, mb_y, s->mb_width + 3); 2508 } else { 2509 update_pos(td, mb_y, mb_x); 2510 } 2511 } 2512 return 0; 2513 } 2514 2515 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, 2516 int jobnr, int threadnr) 2517 { 2518 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1); 2519 } 2520 2521 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, 2522 int jobnr, int threadnr) 2523 { 2524 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0); 2525 } 2526 2527 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, 2528 int jobnr, int threadnr, int is_vp7) 2529 { 2530 VP8Context *s = avctx->priv_data; 2531 VP8ThreadData *td = &s->thread_data[threadnr]; 2532 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs; 2533 AVFrame *curframe = s->curframe->tf.f; 2534 VP8Macroblock *mb; 2535 VP8ThreadData *prev_td, *next_td; 2536 uint8_t *dst[3] = { 2537 curframe->data[0] + 16 * mb_y * s->linesize, 2538 curframe->data[1] + 8 * mb_y * s->uvlinesize, 2539 curframe->data[2] + 8 * mb_y * s->uvlinesize 2540 }; 2541 2542 if (s->mb_layout == 1) 2543 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); 2544 else 2545 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; 2546 2547 if (mb_y == 0) 2548 prev_td = td; 2549 else 2550 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; 2551 if (mb_y == s->mb_height - 1) 2552 next_td = td; 2553 else 2554 next_td = &s->thread_data[(jobnr + 1) % num_jobs]; 2555 2556 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) { 2557 const VP8FilterStrength *f = &td->filter_strength[mb_x]; 2558 if (prev_td != td) 2559 check_thread_pos(td, prev_td, 2560 (mb_x + 1) + (s->mb_width + 3), mb_y - 1); 2561 if (next_td != td) 2562 if (next_td != &s->thread_data[0]) 2563 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1); 2564 2565 if (num_jobs == 1) { 2566 if (s->filter.simple) 2567 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2568 NULL, NULL, s->linesize, 0, 1); 2569 else 2570 backup_mb_border(s->top_border[mb_x + 1], dst[0], 2571 dst[1], dst[2], s->linesize, s->uvlinesize, 0); 2572 } 2573 2574 if (s->filter.simple) 2575 filter_mb_simple(s, dst[0], f, mb_x, mb_y); 2576 else 2577 filter_mb(s, dst, f, mb_x, mb_y, is_vp7); 2578 dst[0] += 16; 2579 dst[1] += 8; 2580 dst[2] += 8; 2581 2582 update_pos(td, mb_y, (s->mb_width + 3) + mb_x); 2583 } 2584 } 2585 2586 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, 2587 int jobnr, int threadnr) 2588 { 2589 filter_mb_row(avctx, tdata, jobnr, threadnr, 1); 2590 } 2591 2592 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, 2593 int jobnr, int threadnr) 2594 { 2595 filter_mb_row(avctx, tdata, jobnr, threadnr, 0); 2596 } 2597 2598 static av_always_inline 2599 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, 2600 int threadnr, int is_vp7) 2601 { 2602 const VP8Context *s = avctx->priv_data; 2603 VP8ThreadData *td = &s->thread_data[jobnr]; 2604 VP8ThreadData *next_td = NULL, *prev_td = NULL; 2605 VP8Frame *curframe = s->curframe; 2606 int mb_y, num_jobs = s->num_jobs; 2607 int ret; 2608 2609 td->thread_nr = threadnr; 2610 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr; 2611 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr; 2612 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { 2613 atomic_store(&td->thread_mb_pos, mb_y << 16); 2614 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); 2615 if (ret < 0) { 2616 update_pos(td, s->mb_height, INT_MAX & 0xFFFF); 2617 return ret; 2618 } 2619 if (s->deblock_filter) 2620 s->filter_mb_row(avctx, tdata, jobnr, threadnr); 2621 update_pos(td, mb_y, INT_MAX & 0xFFFF); 2622 2623 td->mv_bounds.mv_min.y -= 64 * num_jobs; 2624 td->mv_bounds.mv_max.y -= 64 * num_jobs; 2625 2626 if (avctx->active_thread_type == FF_THREAD_FRAME) 2627 ff_progress_frame_report(&curframe->tf, mb_y); 2628 } 2629 2630 return 0; 2631 } 2632 2633 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, 2634 int jobnr, int threadnr) 2635 { 2636 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7); 2637 } 2638 2639 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, 2640 int jobnr, int threadnr) 2641 { 2642 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8); 2643 } 2644 2645 static av_always_inline 2646 int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame, 2647 const AVPacket *avpkt, int is_vp7) 2648 { 2649 VP8Context *s = avctx->priv_data; 2650 int ret, i, referenced, num_jobs; 2651 enum AVDiscard skip_thresh; 2652 VP8Frame *av_uninit(curframe), *prev_frame; 2653 2654 if (is_vp7) 2655 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size); 2656 else 2657 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size); 2658 2659 if (ret < 0) 2660 goto err; 2661 2662 if (!is_vp7 && s->actually_webp) { 2663 // VP8 in WebP is supposed to be intra-only. Enforce this here 2664 // to ensure that output is reproducible with frame-threading. 2665 if (!s->keyframe) 2666 return AVERROR_INVALIDDATA; 2667 // avctx->pix_fmt already set in caller. 2668 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) { 2669 s->pix_fmt = get_pixel_format(s); 2670 if (s->pix_fmt < 0) { 2671 ret = AVERROR(EINVAL); 2672 goto err; 2673 } 2674 avctx->pix_fmt = s->pix_fmt; 2675 } 2676 2677 prev_frame = s->framep[VP8_FRAME_CURRENT]; 2678 2679 referenced = s->update_last || s->update_golden == VP8_FRAME_CURRENT || 2680 s->update_altref == VP8_FRAME_CURRENT; 2681 2682 skip_thresh = !referenced ? AVDISCARD_NONREF 2683 : !s->keyframe ? AVDISCARD_NONKEY 2684 : AVDISCARD_ALL; 2685 2686 if (avctx->skip_frame >= skip_thresh) { 2687 s->invisible = 1; 2688 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); 2689 goto skip_decode; 2690 } 2691 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; 2692 2693 // release no longer referenced frames 2694 for (i = 0; i < 5; i++) 2695 if (s->frames[i].tf.f && 2696 &s->frames[i] != prev_frame && 2697 &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] && 2698 &s->frames[i] != s->framep[VP8_FRAME_GOLDEN] && 2699 &s->frames[i] != s->framep[VP8_FRAME_ALTREF]) 2700 vp8_release_frame(&s->frames[i]); 2701 2702 curframe = s->framep[VP8_FRAME_CURRENT] = vp8_find_free_buffer(s); 2703 2704 if (!s->colorspace) 2705 avctx->colorspace = AVCOL_SPC_BT470BG; 2706 if (s->fullrange) 2707 avctx->color_range = AVCOL_RANGE_JPEG; 2708 else 2709 avctx->color_range = AVCOL_RANGE_MPEG; 2710 2711 /* Given that arithmetic probabilities are updated every frame, it's quite 2712 * likely that the values we have on a random interframe are complete 2713 * junk if we didn't start decode on a keyframe. So just don't display 2714 * anything rather than junk. */ 2715 if (!s->keyframe && (!s->framep[VP8_FRAME_PREVIOUS] || 2716 !s->framep[VP8_FRAME_GOLDEN] || 2717 !s->framep[VP8_FRAME_ALTREF])) { 2718 av_log(avctx, AV_LOG_WARNING, 2719 "Discarding interframe without a prior keyframe!\n"); 2720 ret = AVERROR_INVALIDDATA; 2721 goto err; 2722 } 2723 2724 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0) 2725 goto err; 2726 if (s->keyframe) 2727 curframe->tf.f->flags |= AV_FRAME_FLAG_KEY; 2728 else 2729 curframe->tf.f->flags &= ~AV_FRAME_FLAG_KEY; 2730 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I 2731 : AV_PICTURE_TYPE_P; 2732 2733 // check if golden and altref are swapped 2734 if (s->update_altref != VP8_FRAME_NONE) 2735 s->next_framep[VP8_FRAME_ALTREF] = s->framep[s->update_altref]; 2736 else 2737 s->next_framep[VP8_FRAME_ALTREF] = s->framep[VP8_FRAME_ALTREF]; 2738 2739 if (s->update_golden != VP8_FRAME_NONE) 2740 s->next_framep[VP8_FRAME_GOLDEN] = s->framep[s->update_golden]; 2741 else 2742 s->next_framep[VP8_FRAME_GOLDEN] = s->framep[VP8_FRAME_GOLDEN]; 2743 2744 if (s->update_last) 2745 s->next_framep[VP8_FRAME_PREVIOUS] = curframe; 2746 else 2747 s->next_framep[VP8_FRAME_PREVIOUS] = s->framep[VP8_FRAME_PREVIOUS]; 2748 2749 s->next_framep[VP8_FRAME_CURRENT] = curframe; 2750 2751 if (!is_vp7 && !s->actually_webp) 2752 ff_thread_finish_setup(avctx); 2753 2754 if (avctx->hwaccel) { 2755 const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel); 2756 ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size); 2757 if (ret < 0) 2758 goto err; 2759 2760 ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size); 2761 if (ret < 0) 2762 goto err; 2763 2764 ret = hwaccel->end_frame(avctx); 2765 if (ret < 0) 2766 goto err; 2767 2768 } else { 2769 s->linesize = curframe->tf.f->linesize[0]; 2770 s->uvlinesize = curframe->tf.f->linesize[1]; 2771 2772 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); 2773 /* Zero macroblock structures for top/top-left prediction 2774 * from outside the frame. */ 2775 if (!s->mb_layout) 2776 memset(s->macroblocks + s->mb_height * 2 - 1, 0, 2777 (s->mb_width + 1) * sizeof(*s->macroblocks)); 2778 if (!s->mb_layout && s->keyframe) 2779 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); 2780 2781 memset(s->ref_count, 0, sizeof(s->ref_count)); 2782 2783 if (s->mb_layout == 1) { 2784 // Make sure the previous frame has read its segmentation map, 2785 // if we re-use the same map. 2786 if (prev_frame && s->segmentation.enabled && 2787 !s->segmentation.update_map) 2788 ff_progress_frame_await(&prev_frame->tf, 1); 2789 if (is_vp7) 2790 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); 2791 else 2792 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); 2793 if (ret < 0) 2794 goto err; 2795 } 2796 2797 if (avctx->active_thread_type == FF_THREAD_FRAME) 2798 num_jobs = 1; 2799 else 2800 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); 2801 s->num_jobs = num_jobs; 2802 s->curframe = curframe; 2803 s->prev_frame = prev_frame; 2804 s->mv_bounds.mv_min.y = -MARGIN; 2805 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; 2806 for (i = 0; i < MAX_THREADS; i++) { 2807 VP8ThreadData *td = &s->thread_data[i]; 2808 atomic_init(&td->thread_mb_pos, 0); 2809 atomic_init(&td->wait_mb_pos, INT_MAX); 2810 } 2811 if (is_vp7) 2812 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, 2813 num_jobs); 2814 else 2815 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, 2816 num_jobs); 2817 } 2818 2819 ff_progress_frame_report(&curframe->tf, INT_MAX); 2820 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); 2821 2822 skip_decode: 2823 // if future frames don't use the updated probabilities, 2824 // reset them to the values we saved 2825 if (!s->update_probabilities) 2826 s->prob[0] = s->prob[1]; 2827 2828 if (!s->invisible) { 2829 if ((ret = av_frame_ref(rframe, curframe->tf.f)) < 0) 2830 return ret; 2831 *got_frame = 1; 2832 } 2833 2834 return avpkt->size; 2835 err: 2836 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4); 2837 return ret; 2838 } 2839 2840 int ff_vp8_decode_frame(AVCodecContext *avctx, AVFrame *frame, 2841 int *got_frame, AVPacket *avpkt) 2842 { 2843 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP8); 2844 } 2845 2846 #if CONFIG_VP7_DECODER 2847 static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame, 2848 int *got_frame, AVPacket *avpkt) 2849 { 2850 return vp78_decode_frame(avctx, frame, got_frame, avpkt, IS_VP7); 2851 } 2852 #endif /* CONFIG_VP7_DECODER */ 2853 2854 av_cold int ff_vp8_decode_free(AVCodecContext *avctx) 2855 { 2856 vp8_decode_flush_impl(avctx, 1); 2857 2858 return 0; 2859 } 2860 2861 static av_always_inline 2862 int vp78_decode_init(AVCodecContext *avctx, int is_vp7) 2863 { 2864 VP8Context *s = avctx->priv_data; 2865 2866 s->avctx = avctx; 2867 s->pix_fmt = AV_PIX_FMT_NONE; 2868 avctx->pix_fmt = AV_PIX_FMT_YUV420P; 2869 2870 ff_videodsp_init(&s->vdsp, 8); 2871 2872 ff_vp78dsp_init(&s->vp8dsp); 2873 if (CONFIG_VP7_DECODER && is_vp7) { 2874 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1); 2875 ff_vp7dsp_init(&s->vp8dsp); 2876 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter; 2877 s->filter_mb_row = vp7_filter_mb_row; 2878 } else if (CONFIG_VP8_DECODER && !is_vp7) { 2879 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); 2880 ff_vp8dsp_init(&s->vp8dsp); 2881 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter; 2882 s->filter_mb_row = vp8_filter_mb_row; 2883 } 2884 2885 /* does not change for VP8 */ 2886 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan)); 2887 2888 return 0; 2889 } 2890 2891 #if CONFIG_VP7_DECODER 2892 static int vp7_decode_init(AVCodecContext *avctx) 2893 { 2894 return vp78_decode_init(avctx, IS_VP7); 2895 } 2896 #endif /* CONFIG_VP7_DECODER */ 2897 2898 av_cold int ff_vp8_decode_init(AVCodecContext *avctx) 2899 { 2900 return vp78_decode_init(avctx, IS_VP8); 2901 } 2902 2903 #if CONFIG_VP8_DECODER 2904 #if HAVE_THREADS 2905 static void vp8_replace_frame(VP8Frame *dst, const VP8Frame *src) 2906 { 2907 ff_progress_frame_replace(&dst->tf, &src->tf); 2908 av_refstruct_replace(&dst->seg_map, src->seg_map); 2909 av_refstruct_replace(&dst->hwaccel_picture_private, 2910 src->hwaccel_picture_private); 2911 } 2912 2913 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL) 2914 2915 static int vp8_decode_update_thread_context(AVCodecContext *dst, 2916 const AVCodecContext *src) 2917 { 2918 VP8Context *s = dst->priv_data, *s_src = src->priv_data; 2919 2920 if (s->macroblocks_base && 2921 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { 2922 free_buffers(s); 2923 s->mb_width = s_src->mb_width; 2924 s->mb_height = s_src->mb_height; 2925 } 2926 2927 s->pix_fmt = s_src->pix_fmt; 2928 s->prob[0] = s_src->prob[!s_src->update_probabilities]; 2929 s->segmentation = s_src->segmentation; 2930 s->lf_delta = s_src->lf_delta; 2931 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); 2932 2933 for (int i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) 2934 vp8_replace_frame(&s->frames[i], &s_src->frames[i]); 2935 2936 s->framep[0] = REBASE(s_src->next_framep[0]); 2937 s->framep[1] = REBASE(s_src->next_framep[1]); 2938 s->framep[2] = REBASE(s_src->next_framep[2]); 2939 s->framep[3] = REBASE(s_src->next_framep[3]); 2940 2941 return 0; 2942 } 2943 #endif /* HAVE_THREADS */ 2944 #endif /* CONFIG_VP8_DECODER */ 2945 2946 #if CONFIG_VP7_DECODER 2947 const FFCodec ff_vp7_decoder = { 2948 .p.name = "vp7", 2949 CODEC_LONG_NAME("On2 VP7"), 2950 .p.type = AVMEDIA_TYPE_VIDEO, 2951 .p.id = AV_CODEC_ID_VP7, 2952 .priv_data_size = sizeof(VP8Context), 2953 .init = vp7_decode_init, 2954 .close = ff_vp8_decode_free, 2955 FF_CODEC_DECODE_CB(vp7_decode_frame), 2956 .p.capabilities = AV_CODEC_CAP_DR1, 2957 .flush = vp8_decode_flush, 2958 .caps_internal = FF_CODEC_CAP_USES_PROGRESSFRAMES, 2959 }; 2960 #endif /* CONFIG_VP7_DECODER */ 2961 2962 #if CONFIG_VP8_DECODER 2963 const FFCodec ff_vp8_decoder = { 2964 .p.name = "vp8", 2965 CODEC_LONG_NAME("On2 VP8"), 2966 .p.type = AVMEDIA_TYPE_VIDEO, 2967 .p.id = AV_CODEC_ID_VP8, 2968 .priv_data_size = sizeof(VP8Context), 2969 .init = ff_vp8_decode_init, 2970 .close = ff_vp8_decode_free, 2971 FF_CODEC_DECODE_CB(ff_vp8_decode_frame), 2972 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | 2973 AV_CODEC_CAP_SLICE_THREADS, 2974 .caps_internal = FF_CODEC_CAP_USES_PROGRESSFRAMES, 2975 .flush = vp8_decode_flush, 2976 UPDATE_THREAD_CONTEXT(vp8_decode_update_thread_context), 2977 .hw_configs = (const AVCodecHWConfigInternal *const []) { 2978 #if CONFIG_VP8_VAAPI_HWACCEL 2979 HWACCEL_VAAPI(vp8), 2980 #endif 2981 #if CONFIG_VP8_NVDEC_HWACCEL 2982 HWACCEL_NVDEC(vp8), 2983 #endif 2984 NULL 2985 }, 2986 }; 2987 #endif /* CONFIG_VP7_DECODER */