vp9.c (74944B)
1 /* 2 * VP9 compatible video decoder 3 * 4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> 5 * Copyright (C) 2013 Clément Bœsch <u pkh me> 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24 #include "config_components.h" 25 26 #include "avcodec.h" 27 #include "codec_internal.h" 28 #include "decode.h" 29 #include "get_bits.h" 30 #include "hwaccel_internal.h" 31 #include "hwconfig.h" 32 #include "profiles.h" 33 #include "progressframe.h" 34 #include "libavutil/refstruct.h" 35 #include "thread.h" 36 #include "pthread_internal.h" 37 38 #include "videodsp.h" 39 #include "vp89_rac.h" 40 #include "vp9.h" 41 #include "vp9data.h" 42 #include "vp9dec.h" 43 #include "vpx_rac.h" 44 #include "libavutil/avassert.h" 45 #include "libavutil/mem.h" 46 #include "libavutil/pixdesc.h" 47 #include "libavutil/video_enc_params.h" 48 49 #define VP9_SYNCCODE 0x498342 50 51 #if HAVE_THREADS 52 DEFINE_OFFSET_ARRAY(VP9Context, vp9_context, pthread_init_cnt, 53 (offsetof(VP9Context, progress_mutex)), 54 (offsetof(VP9Context, progress_cond))); 55 56 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { 57 VP9Context *s = avctx->priv_data; 58 59 if (avctx->active_thread_type & FF_THREAD_SLICE) { 60 if (s->entries) 61 av_freep(&s->entries); 62 63 s->entries = av_malloc_array(n, sizeof(atomic_int)); 64 if (!s->entries) 65 return AVERROR(ENOMEM); 66 } 67 return 0; 68 } 69 70 static void vp9_report_tile_progress(VP9Context *s, int field, int n) { 71 pthread_mutex_lock(&s->progress_mutex); 72 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release); 73 pthread_cond_signal(&s->progress_cond); 74 pthread_mutex_unlock(&s->progress_mutex); 75 } 76 77 static void vp9_await_tile_progress(VP9Context *s, int field, int n) { 78 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n) 79 return; 80 81 pthread_mutex_lock(&s->progress_mutex); 82 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n) 83 pthread_cond_wait(&s->progress_cond, &s->progress_mutex); 84 pthread_mutex_unlock(&s->progress_mutex); 85 } 86 #else 87 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; } 88 #endif 89 90 static void vp9_tile_data_free(VP9TileData *td) 91 { 92 av_freep(&td->b_base); 93 av_freep(&td->block_base); 94 av_freep(&td->block_structure); 95 } 96 97 static void vp9_frame_unref(VP9Frame *f) 98 { 99 ff_progress_frame_unref(&f->tf); 100 av_refstruct_unref(&f->extradata); 101 av_refstruct_unref(&f->hwaccel_picture_private); 102 f->segmentation_map = NULL; 103 } 104 105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f) 106 { 107 VP9Context *s = avctx->priv_data; 108 int ret, sz; 109 110 ret = ff_progress_frame_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF); 111 if (ret < 0) 112 return ret; 113 114 sz = 64 * s->sb_cols * s->sb_rows; 115 if (sz != s->frame_extradata_pool_size) { 116 av_refstruct_pool_uninit(&s->frame_extradata_pool); 117 s->frame_extradata_pool = av_refstruct_pool_alloc(sz * (1 + sizeof(VP9mvrefPair)), 118 AV_REFSTRUCT_POOL_FLAG_ZERO_EVERY_TIME); 119 if (!s->frame_extradata_pool) { 120 s->frame_extradata_pool_size = 0; 121 ret = AVERROR(ENOMEM); 122 goto fail; 123 } 124 s->frame_extradata_pool_size = sz; 125 } 126 f->extradata = av_refstruct_pool_get(s->frame_extradata_pool); 127 if (!f->extradata) { 128 ret = AVERROR(ENOMEM); 129 goto fail; 130 } 131 132 f->segmentation_map = f->extradata; 133 f->mv = (VP9mvrefPair *) ((char*)f->extradata + sz); 134 135 ret = ff_hwaccel_frame_priv_alloc(avctx, &f->hwaccel_picture_private); 136 if (ret < 0) 137 goto fail; 138 139 return 0; 140 141 fail: 142 vp9_frame_unref(f); 143 return ret; 144 } 145 146 static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src) 147 { 148 ff_progress_frame_replace(&dst->tf, &src->tf); 149 150 av_refstruct_replace(&dst->extradata, src->extradata); 151 152 dst->segmentation_map = src->segmentation_map; 153 dst->mv = src->mv; 154 dst->uses_2pass = src->uses_2pass; 155 156 av_refstruct_replace(&dst->hwaccel_picture_private, 157 src->hwaccel_picture_private); 158 } 159 160 static int update_size(AVCodecContext *avctx, int w, int h) 161 { 162 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \ 163 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \ 164 CONFIG_VP9_D3D12VA_HWACCEL + \ 165 CONFIG_VP9_NVDEC_HWACCEL + \ 166 CONFIG_VP9_VAAPI_HWACCEL + \ 167 CONFIG_VP9_VDPAU_HWACCEL + \ 168 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) 169 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts; 170 VP9Context *s = avctx->priv_data; 171 uint8_t *p; 172 int bytesperpixel = s->bytesperpixel, ret, cols, rows; 173 int lflvl_len, i; 174 175 av_assert0(w > 0 && h > 0); 176 177 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) { 178 if ((ret = ff_set_dimensions(avctx, w, h)) < 0) 179 return ret; 180 181 switch (s->pix_fmt) { 182 case AV_PIX_FMT_YUV420P: 183 case AV_PIX_FMT_YUV420P10: 184 #if CONFIG_VP9_DXVA2_HWACCEL 185 *fmtp++ = AV_PIX_FMT_DXVA2_VLD; 186 #endif 187 #if CONFIG_VP9_D3D11VA_HWACCEL 188 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD; 189 *fmtp++ = AV_PIX_FMT_D3D11; 190 #endif 191 #if CONFIG_VP9_D3D12VA_HWACCEL 192 *fmtp++ = AV_PIX_FMT_D3D12; 193 #endif 194 #if CONFIG_VP9_NVDEC_HWACCEL 195 *fmtp++ = AV_PIX_FMT_CUDA; 196 #endif 197 #if CONFIG_VP9_VAAPI_HWACCEL 198 *fmtp++ = AV_PIX_FMT_VAAPI; 199 #endif 200 #if CONFIG_VP9_VDPAU_HWACCEL 201 *fmtp++ = AV_PIX_FMT_VDPAU; 202 #endif 203 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL 204 *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX; 205 #endif 206 break; 207 case AV_PIX_FMT_YUV420P12: 208 #if CONFIG_VP9_NVDEC_HWACCEL 209 *fmtp++ = AV_PIX_FMT_CUDA; 210 #endif 211 #if CONFIG_VP9_VAAPI_HWACCEL 212 *fmtp++ = AV_PIX_FMT_VAAPI; 213 #endif 214 #if CONFIG_VP9_VDPAU_HWACCEL 215 *fmtp++ = AV_PIX_FMT_VDPAU; 216 #endif 217 break; 218 case AV_PIX_FMT_YUV444P: 219 case AV_PIX_FMT_YUV444P10: 220 case AV_PIX_FMT_YUV444P12: 221 #if CONFIG_VP9_VAAPI_HWACCEL 222 *fmtp++ = AV_PIX_FMT_VAAPI; 223 #endif 224 break; 225 case AV_PIX_FMT_GBRP: 226 case AV_PIX_FMT_GBRP10: 227 case AV_PIX_FMT_GBRP12: 228 #if CONFIG_VP9_VAAPI_HWACCEL 229 *fmtp++ = AV_PIX_FMT_VAAPI; 230 #endif 231 break; 232 } 233 234 *fmtp++ = s->pix_fmt; 235 *fmtp = AV_PIX_FMT_NONE; 236 237 ret = ff_get_format(avctx, pix_fmts); 238 if (ret < 0) 239 return ret; 240 241 avctx->pix_fmt = ret; 242 s->gf_fmt = s->pix_fmt; 243 s->w = w; 244 s->h = h; 245 } 246 247 cols = (w + 7) >> 3; 248 rows = (h + 7) >> 3; 249 250 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt) 251 return 0; 252 253 s->last_fmt = s->pix_fmt; 254 s->sb_cols = (w + 63) >> 6; 255 s->sb_rows = (h + 63) >> 6; 256 s->cols = (w + 7) >> 3; 257 s->rows = (h + 7) >> 3; 258 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1; 259 260 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var) 261 av_freep(&s->intra_pred_data[0]); 262 // FIXME we slightly over-allocate here for subsampled chroma, but a little 263 // bit of padding shouldn't affect performance... 264 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel + 265 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx))); 266 if (!p) 267 return AVERROR(ENOMEM); 268 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel); 269 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel); 270 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel); 271 assign(s->above_y_nnz_ctx, uint8_t *, 16); 272 assign(s->above_mode_ctx, uint8_t *, 16); 273 assign(s->above_mv_ctx, VP9mv(*)[2], 16); 274 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16); 275 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16); 276 assign(s->above_partition_ctx, uint8_t *, 8); 277 assign(s->above_skip_ctx, uint8_t *, 8); 278 assign(s->above_txfm_ctx, uint8_t *, 8); 279 assign(s->above_segpred_ctx, uint8_t *, 8); 280 assign(s->above_intra_ctx, uint8_t *, 8); 281 assign(s->above_comp_ctx, uint8_t *, 8); 282 assign(s->above_ref_ctx, uint8_t *, 8); 283 assign(s->above_filter_ctx, uint8_t *, 8); 284 assign(s->lflvl, VP9Filter *, lflvl_len); 285 #undef assign 286 287 if (s->td) { 288 for (i = 0; i < s->active_tile_cols; i++) 289 vp9_tile_data_free(&s->td[i]); 290 } 291 292 if (s->s.h.bpp != s->last_bpp) { 293 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT); 294 ff_videodsp_init(&s->vdsp, s->s.h.bpp); 295 s->last_bpp = s->s.h.bpp; 296 } 297 298 return 0; 299 } 300 301 static int update_block_buffers(AVCodecContext *avctx) 302 { 303 int i; 304 VP9Context *s = avctx->priv_data; 305 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel; 306 VP9TileData *td = &s->td[0]; 307 308 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass) 309 return 0; 310 311 vp9_tile_data_free(td); 312 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v); 313 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v); 314 if (s->s.frames[CUR_FRAME].uses_2pass) { 315 int sbs = s->sb_cols * s->sb_rows; 316 317 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block)); 318 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) + 319 16 * 16 + 2 * chroma_eobs) * sbs); 320 if (!td->b_base || !td->block_base) 321 return AVERROR(ENOMEM); 322 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel; 323 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel; 324 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel); 325 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs; 326 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs; 327 328 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) { 329 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure)); 330 if (!td->block_structure) 331 return AVERROR(ENOMEM); 332 } 333 } else { 334 for (i = 1; i < s->active_tile_cols; i++) 335 vp9_tile_data_free(&s->td[i]); 336 337 for (i = 0; i < s->active_tile_cols; i++) { 338 s->td[i].b_base = av_malloc(sizeof(VP9Block)); 339 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) + 340 16 * 16 + 2 * chroma_eobs); 341 if (!s->td[i].b_base || !s->td[i].block_base) 342 return AVERROR(ENOMEM); 343 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel; 344 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel; 345 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel); 346 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16; 347 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs; 348 349 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) { 350 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure)); 351 if (!s->td[i].block_structure) 352 return AVERROR(ENOMEM); 353 } 354 } 355 } 356 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass; 357 358 return 0; 359 } 360 361 // The sign bit is at the end, not the start, of a bit sequence 362 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n) 363 { 364 int v = get_bits(gb, n); 365 return get_bits1(gb) ? -v : v; 366 } 367 368 static av_always_inline int inv_recenter_nonneg(int v, int m) 369 { 370 if (v > 2 * m) 371 return v; 372 if (v & 1) 373 return m - ((v + 1) >> 1); 374 return m + (v >> 1); 375 } 376 377 // differential forward probability updates 378 static int update_prob(VPXRangeCoder *c, int p) 379 { 380 static const uint8_t inv_map_table[255] = { 381 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 382 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 383 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 384 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 385 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 386 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 387 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 388 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 389 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, 390 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 391 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145, 392 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 393 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 394 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 395 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, 396 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 397 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 398 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 399 252, 253, 253, 400 }; 401 int d; 402 403 /* This code is trying to do a differential probability update. For a 404 * current probability A in the range [1, 255], the difference to a new 405 * probability of any value can be expressed differentially as 1-A, 255-A 406 * where some part of this (absolute range) exists both in positive as 407 * well as the negative part, whereas another part only exists in one 408 * half. We're trying to code this shared part differentially, i.e. 409 * times two where the value of the lowest bit specifies the sign, and 410 * the single part is then coded on top of this. This absolute difference 411 * then again has a value of [0, 254], but a bigger value in this range 412 * indicates that we're further away from the original value A, so we 413 * can code this as a VLC code, since higher values are increasingly 414 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough' 415 * updates vs. the 'fine, exact' updates further down the range, which 416 * adds one extra dimension to this differential update model. */ 417 418 if (!vp89_rac_get(c)) { 419 d = vp89_rac_get_uint(c, 4) + 0; 420 } else if (!vp89_rac_get(c)) { 421 d = vp89_rac_get_uint(c, 4) + 16; 422 } else if (!vp89_rac_get(c)) { 423 d = vp89_rac_get_uint(c, 5) + 32; 424 } else { 425 d = vp89_rac_get_uint(c, 7); 426 if (d >= 65) 427 d = (d << 1) - 65 + vp89_rac_get(c); 428 d += 64; 429 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table)); 430 } 431 432 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) : 433 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p); 434 } 435 436 static int read_colorspace_details(AVCodecContext *avctx) 437 { 438 static const enum AVColorSpace colorspaces[8] = { 439 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M, 440 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB, 441 }; 442 VP9Context *s = avctx->priv_data; 443 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12 444 445 s->bpp_index = bits; 446 s->s.h.bpp = 8 + bits * 2; 447 s->bytesperpixel = (7 + s->s.h.bpp) >> 3; 448 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; 449 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 450 static const enum AVPixelFormat pix_fmt_rgb[3] = { 451 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12 452 }; 453 s->ss_h = s->ss_v = 0; 454 avctx->color_range = AVCOL_RANGE_JPEG; 455 s->pix_fmt = pix_fmt_rgb[bits]; 456 if (avctx->profile & 1) { 457 if (get_bits1(&s->gb)) { 458 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n"); 459 return AVERROR_INVALIDDATA; 460 } 461 } else { 462 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n", 463 avctx->profile); 464 return AVERROR_INVALIDDATA; 465 } 466 } else { 467 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = { 468 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P }, 469 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } }, 470 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 }, 471 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } }, 472 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 }, 473 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } } 474 }; 475 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; 476 if (avctx->profile & 1) { 477 s->ss_h = get_bits1(&s->gb); 478 s->ss_v = get_bits1(&s->gb); 479 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]; 480 if (s->pix_fmt == AV_PIX_FMT_YUV420P) { 481 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n", 482 avctx->profile); 483 return AVERROR_INVALIDDATA; 484 } else if (get_bits1(&s->gb)) { 485 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n", 486 avctx->profile); 487 return AVERROR_INVALIDDATA; 488 } 489 } else { 490 s->ss_h = s->ss_v = 1; 491 s->pix_fmt = pix_fmt_for_ss[bits][1][1]; 492 } 493 } 494 495 return 0; 496 } 497 498 static int decode_frame_header(AVCodecContext *avctx, 499 const uint8_t *data, int size, int *ref) 500 { 501 VP9Context *s = avctx->priv_data; 502 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp; 503 int last_invisible; 504 const uint8_t *data2; 505 506 /* general header */ 507 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) { 508 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n"); 509 return ret; 510 } 511 if (get_bits(&s->gb, 2) != 0x2) { // frame marker 512 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n"); 513 return AVERROR_INVALIDDATA; 514 } 515 avctx->profile = get_bits1(&s->gb); 516 avctx->profile |= get_bits1(&s->gb) << 1; 517 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb); 518 if (avctx->profile > 3) { 519 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile); 520 return AVERROR_INVALIDDATA; 521 } 522 s->s.h.profile = avctx->profile; 523 if (get_bits1(&s->gb)) { 524 *ref = get_bits(&s->gb, 3); 525 return 0; 526 } 527 528 s->last_keyframe = s->s.h.keyframe; 529 s->s.h.keyframe = !get_bits1(&s->gb); 530 531 last_invisible = s->s.h.invisible; 532 s->s.h.invisible = !get_bits1(&s->gb); 533 s->s.h.errorres = get_bits1(&s->gb); 534 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible; 535 536 if (s->s.h.keyframe) { 537 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode 538 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n"); 539 return AVERROR_INVALIDDATA; 540 } 541 if ((ret = read_colorspace_details(avctx)) < 0) 542 return ret; 543 // for profile 1, here follows the subsampling bits 544 s->s.h.refreshrefmask = 0xff; 545 w = get_bits(&s->gb, 16) + 1; 546 h = get_bits(&s->gb, 16) + 1; 547 if (get_bits1(&s->gb)) // display size 548 skip_bits(&s->gb, 32); 549 } else { 550 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0; 551 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2); 552 if (s->s.h.intraonly) { 553 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode 554 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n"); 555 return AVERROR_INVALIDDATA; 556 } 557 if (avctx->profile >= 1) { 558 if ((ret = read_colorspace_details(avctx)) < 0) 559 return ret; 560 } else { 561 s->ss_h = s->ss_v = 1; 562 s->s.h.bpp = 8; 563 s->bpp_index = 0; 564 s->bytesperpixel = 1; 565 s->pix_fmt = AV_PIX_FMT_YUV420P; 566 avctx->colorspace = AVCOL_SPC_BT470BG; 567 avctx->color_range = AVCOL_RANGE_MPEG; 568 } 569 s->s.h.refreshrefmask = get_bits(&s->gb, 8); 570 w = get_bits(&s->gb, 16) + 1; 571 h = get_bits(&s->gb, 16) + 1; 572 if (get_bits1(&s->gb)) // display size 573 skip_bits(&s->gb, 32); 574 } else { 575 s->s.h.refreshrefmask = get_bits(&s->gb, 8); 576 s->s.h.refidx[0] = get_bits(&s->gb, 3); 577 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres; 578 s->s.h.refidx[1] = get_bits(&s->gb, 3); 579 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres; 580 s->s.h.refidx[2] = get_bits(&s->gb, 3); 581 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres; 582 if (!s->s.refs[s->s.h.refidx[0]].f || 583 !s->s.refs[s->s.h.refidx[1]].f || 584 !s->s.refs[s->s.h.refidx[2]].f) { 585 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n"); 586 return AVERROR_INVALIDDATA; 587 } 588 if (get_bits1(&s->gb)) { 589 w = s->s.refs[s->s.h.refidx[0]].f->width; 590 h = s->s.refs[s->s.h.refidx[0]].f->height; 591 } else if (get_bits1(&s->gb)) { 592 w = s->s.refs[s->s.h.refidx[1]].f->width; 593 h = s->s.refs[s->s.h.refidx[1]].f->height; 594 } else if (get_bits1(&s->gb)) { 595 w = s->s.refs[s->s.h.refidx[2]].f->width; 596 h = s->s.refs[s->s.h.refidx[2]].f->height; 597 } else { 598 w = get_bits(&s->gb, 16) + 1; 599 h = get_bits(&s->gb, 16) + 1; 600 } 601 // Note that in this code, "CUR_FRAME" is actually before we 602 // have formally allocated a frame, and thus actually represents 603 // the _last_ frame 604 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f && 605 s->s.frames[CUR_FRAME].tf.f->width == w && 606 s->s.frames[CUR_FRAME].tf.f->height == h; 607 if (get_bits1(&s->gb)) // display size 608 skip_bits(&s->gb, 32); 609 s->s.h.highprecisionmvs = get_bits1(&s->gb); 610 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE : 611 get_bits(&s->gb, 2); 612 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] || 613 s->s.h.signbias[0] != s->s.h.signbias[2]; 614 if (s->s.h.allowcompinter) { 615 if (s->s.h.signbias[0] == s->s.h.signbias[1]) { 616 s->s.h.fixcompref = 2; 617 s->s.h.varcompref[0] = 0; 618 s->s.h.varcompref[1] = 1; 619 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) { 620 s->s.h.fixcompref = 1; 621 s->s.h.varcompref[0] = 0; 622 s->s.h.varcompref[1] = 2; 623 } else { 624 s->s.h.fixcompref = 0; 625 s->s.h.varcompref[0] = 1; 626 s->s.h.varcompref[1] = 2; 627 } 628 } 629 } 630 } 631 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb); 632 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb); 633 s->s.h.framectxid = c = get_bits(&s->gb, 2); 634 if (s->s.h.keyframe || s->s.h.intraonly) 635 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes 636 637 /* loopfilter header data */ 638 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) { 639 // reset loopfilter defaults 640 s->s.h.lf_delta.ref[0] = 1; 641 s->s.h.lf_delta.ref[1] = 0; 642 s->s.h.lf_delta.ref[2] = -1; 643 s->s.h.lf_delta.ref[3] = -1; 644 s->s.h.lf_delta.mode[0] = 0; 645 s->s.h.lf_delta.mode[1] = 0; 646 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat)); 647 } 648 s->s.h.filter.level = get_bits(&s->gb, 6); 649 sharp = get_bits(&s->gb, 3); 650 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep 651 // the old cache values since they are still valid 652 if (s->s.h.filter.sharpness != sharp) { 653 for (i = 1; i <= 63; i++) { 654 int limit = i; 655 656 if (sharp > 0) { 657 limit >>= (sharp + 3) >> 2; 658 limit = FFMIN(limit, 9 - sharp); 659 } 660 limit = FFMAX(limit, 1); 661 662 s->filter_lut.lim_lut[i] = limit; 663 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit; 664 } 665 } 666 s->s.h.filter.sharpness = sharp; 667 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) { 668 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) { 669 for (i = 0; i < 4; i++) 670 if (get_bits1(&s->gb)) 671 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6); 672 for (i = 0; i < 2; i++) 673 if (get_bits1(&s->gb)) 674 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6); 675 } 676 } 677 678 /* quantization header data */ 679 s->s.h.yac_qi = get_bits(&s->gb, 8); 680 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; 681 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; 682 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0; 683 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 && 684 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0; 685 #if FF_API_CODEC_PROPS 686 FF_DISABLE_DEPRECATION_WARNINGS 687 if (s->s.h.lossless) 688 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS; 689 FF_ENABLE_DEPRECATION_WARNINGS 690 #endif 691 692 /* segmentation header info */ 693 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) { 694 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) { 695 for (i = 0; i < 7; i++) 696 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ? 697 get_bits(&s->gb, 8) : 255; 698 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) 699 for (i = 0; i < 3; i++) 700 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ? 701 get_bits(&s->gb, 8) : 255; 702 } 703 704 if (get_bits1(&s->gb)) { 705 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb); 706 for (i = 0; i < 8; i++) { 707 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb))) 708 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8); 709 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb))) 710 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6); 711 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb))) 712 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2); 713 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb); 714 } 715 } 716 } else { 717 // Reset fields under segmentation switch if segmentation is disabled. 718 // This is necessary because some hwaccels don't ignore these fields 719 // if segmentation is disabled. 720 s->s.h.segmentation.temporal = 0; 721 s->s.h.segmentation.update_map = 0; 722 } 723 724 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas 725 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) { 726 int qyac, qydc, quvac, quvdc, lflvl, sh; 727 728 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) { 729 if (s->s.h.segmentation.absolute_vals) 730 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8); 731 else 732 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8); 733 } else { 734 qyac = s->s.h.yac_qi; 735 } 736 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8); 737 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8); 738 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8); 739 qyac = av_clip_uintp2(qyac, 8); 740 741 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc]; 742 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac]; 743 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc]; 744 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac]; 745 746 sh = s->s.h.filter.level >= 32; 747 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) { 748 if (s->s.h.segmentation.absolute_vals) 749 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6); 750 else 751 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6); 752 } else { 753 lflvl = s->s.h.filter.level; 754 } 755 if (s->s.h.lf_delta.enabled) { 756 s->s.h.segmentation.feat[i].lflvl[0][0] = 757 s->s.h.segmentation.feat[i].lflvl[0][1] = 758 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6); 759 for (j = 1; j < 4; j++) { 760 s->s.h.segmentation.feat[i].lflvl[j][0] = 761 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] + 762 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6); 763 s->s.h.segmentation.feat[i].lflvl[j][1] = 764 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] + 765 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6); 766 } 767 } else { 768 memset(s->s.h.segmentation.feat[i].lflvl, lflvl, 769 sizeof(s->s.h.segmentation.feat[i].lflvl)); 770 } 771 } 772 773 /* tiling info */ 774 if ((ret = update_size(avctx, w, h)) < 0) { 775 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", 776 w, h, s->pix_fmt); 777 return ret; 778 } 779 for (s->s.h.tiling.log2_tile_cols = 0; 780 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols); 781 s->s.h.tiling.log2_tile_cols++) ; 782 for (max = 0; (s->sb_cols >> max) >= 4; max++) ; 783 max = FFMAX(0, max - 1); 784 while (max > s->s.h.tiling.log2_tile_cols) { 785 if (get_bits1(&s->gb)) 786 s->s.h.tiling.log2_tile_cols++; 787 else 788 break; 789 } 790 s->s.h.tiling.log2_tile_rows = decode012(&s->gb); 791 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows; 792 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) { 793 int n_range_coders; 794 VPXRangeCoder *rc; 795 796 if (s->td) { 797 for (i = 0; i < s->active_tile_cols; i++) 798 vp9_tile_data_free(&s->td[i]); 799 av_freep(&s->td); 800 } 801 802 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols; 803 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ? 804 s->s.h.tiling.tile_cols : 1; 805 vp9_alloc_entries(avctx, s->sb_rows); 806 if (avctx->active_thread_type == FF_THREAD_SLICE) { 807 n_range_coders = 4; // max_tile_rows 808 } else { 809 n_range_coders = s->s.h.tiling.tile_cols; 810 } 811 s->td = av_calloc(s->active_tile_cols, sizeof(VP9TileData) + 812 n_range_coders * sizeof(VPXRangeCoder)); 813 if (!s->td) 814 return AVERROR(ENOMEM); 815 rc = (VPXRangeCoder *) &s->td[s->active_tile_cols]; 816 for (i = 0; i < s->active_tile_cols; i++) { 817 s->td[i].s = s; 818 s->td[i].c_b = rc; 819 rc += n_range_coders; 820 } 821 } 822 823 /* check reference frames */ 824 if (!s->s.h.keyframe && !s->s.h.intraonly) { 825 int valid_ref_frame = 0; 826 for (i = 0; i < 3; i++) { 827 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f; 828 int refw = ref->width, refh = ref->height; 829 830 if (ref->format != avctx->pix_fmt) { 831 av_log(avctx, AV_LOG_ERROR, 832 "Ref pixfmt (%s) did not match current frame (%s)", 833 av_get_pix_fmt_name(ref->format), 834 av_get_pix_fmt_name(avctx->pix_fmt)); 835 return AVERROR_INVALIDDATA; 836 } else if (refw == w && refh == h) { 837 s->mvscale[i][0] = s->mvscale[i][1] = 0; 838 } else { 839 /* Check to make sure at least one of frames that */ 840 /* this frame references has valid dimensions */ 841 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) { 842 av_log(avctx, AV_LOG_WARNING, 843 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n", 844 refw, refh, w, h); 845 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE; 846 continue; 847 } 848 s->mvscale[i][0] = (refw << 14) / w; 849 s->mvscale[i][1] = (refh << 14) / h; 850 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14; 851 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14; 852 } 853 valid_ref_frame++; 854 } 855 if (!valid_ref_frame) { 856 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n"); 857 return AVERROR_INVALIDDATA; 858 } 859 } 860 861 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) { 862 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p = 863 s->prob_ctx[3].p = ff_vp9_default_probs; 864 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs, 865 sizeof(ff_vp9_default_coef_probs)); 866 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs, 867 sizeof(ff_vp9_default_coef_probs)); 868 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs, 869 sizeof(ff_vp9_default_coef_probs)); 870 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs, 871 sizeof(ff_vp9_default_coef_probs)); 872 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) { 873 s->prob_ctx[c].p = ff_vp9_default_probs; 874 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs, 875 sizeof(ff_vp9_default_coef_probs)); 876 } 877 878 // next 16 bits is size of the rest of the header (arith-coded) 879 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16); 880 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8; 881 882 data2 = align_get_bits(&s->gb); 883 if (size2 > size - (data2 - data)) { 884 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n"); 885 return AVERROR_INVALIDDATA; 886 } 887 ret = ff_vpx_init_range_decoder(&s->c, data2, size2); 888 if (ret < 0) 889 return ret; 890 891 if (vpx_rac_get_prob_branchy(&s->c, 128)) { // marker bit 892 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n"); 893 return AVERROR_INVALIDDATA; 894 } 895 896 for (i = 0; i < s->active_tile_cols; i++) { 897 if (s->s.h.keyframe || s->s.h.intraonly) { 898 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef)); 899 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob)); 900 } else { 901 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts)); 902 } 903 s->td[i].nb_block_structure = 0; 904 } 905 906 /* FIXME is it faster to not copy here, but do it down in the fw updates 907 * as explicit copies if the fw update is missing (and skip the copy upon 908 * fw update)? */ 909 s->prob.p = s->prob_ctx[c].p; 910 911 // txfm updates 912 if (s->s.h.lossless) { 913 s->s.h.txfmmode = TX_4X4; 914 } else { 915 s->s.h.txfmmode = vp89_rac_get_uint(&s->c, 2); 916 if (s->s.h.txfmmode == 3) 917 s->s.h.txfmmode += vp89_rac_get(&s->c); 918 919 if (s->s.h.txfmmode == TX_SWITCHABLE) { 920 for (i = 0; i < 2; i++) 921 if (vpx_rac_get_prob_branchy(&s->c, 252)) 922 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]); 923 for (i = 0; i < 2; i++) 924 for (j = 0; j < 2; j++) 925 if (vpx_rac_get_prob_branchy(&s->c, 252)) 926 s->prob.p.tx16p[i][j] = 927 update_prob(&s->c, s->prob.p.tx16p[i][j]); 928 for (i = 0; i < 2; i++) 929 for (j = 0; j < 3; j++) 930 if (vpx_rac_get_prob_branchy(&s->c, 252)) 931 s->prob.p.tx32p[i][j] = 932 update_prob(&s->c, s->prob.p.tx32p[i][j]); 933 } 934 } 935 936 // coef updates 937 for (i = 0; i < 4; i++) { 938 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i]; 939 if (vp89_rac_get(&s->c)) { 940 for (j = 0; j < 2; j++) 941 for (k = 0; k < 2; k++) 942 for (l = 0; l < 6; l++) 943 for (m = 0; m < 6; m++) { 944 uint8_t *p = s->prob.coef[i][j][k][l][m]; 945 uint8_t *r = ref[j][k][l][m]; 946 if (m >= 3 && l == 0) // dc only has 3 pt 947 break; 948 for (n = 0; n < 3; n++) { 949 if (vpx_rac_get_prob_branchy(&s->c, 252)) 950 p[n] = update_prob(&s->c, r[n]); 951 else 952 p[n] = r[n]; 953 } 954 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8); 955 } 956 } else { 957 for (j = 0; j < 2; j++) 958 for (k = 0; k < 2; k++) 959 for (l = 0; l < 6; l++) 960 for (m = 0; m < 6; m++) { 961 uint8_t *p = s->prob.coef[i][j][k][l][m]; 962 uint8_t *r = ref[j][k][l][m]; 963 if (m > 3 && l == 0) // dc only has 3 pt 964 break; 965 memcpy(p, r, 3); 966 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8); 967 } 968 } 969 if (s->s.h.txfmmode == i) 970 break; 971 } 972 973 // mode updates 974 for (i = 0; i < 3; i++) 975 if (vpx_rac_get_prob_branchy(&s->c, 252)) 976 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]); 977 if (!s->s.h.keyframe && !s->s.h.intraonly) { 978 for (i = 0; i < 7; i++) 979 for (j = 0; j < 3; j++) 980 if (vpx_rac_get_prob_branchy(&s->c, 252)) 981 s->prob.p.mv_mode[i][j] = 982 update_prob(&s->c, s->prob.p.mv_mode[i][j]); 983 984 if (s->s.h.filtermode == FILTER_SWITCHABLE) 985 for (i = 0; i < 4; i++) 986 for (j = 0; j < 2; j++) 987 if (vpx_rac_get_prob_branchy(&s->c, 252)) 988 s->prob.p.filter[i][j] = 989 update_prob(&s->c, s->prob.p.filter[i][j]); 990 991 for (i = 0; i < 4; i++) 992 if (vpx_rac_get_prob_branchy(&s->c, 252)) 993 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]); 994 995 if (s->s.h.allowcompinter) { 996 s->s.h.comppredmode = vp89_rac_get(&s->c); 997 if (s->s.h.comppredmode) 998 s->s.h.comppredmode += vp89_rac_get(&s->c); 999 if (s->s.h.comppredmode == PRED_SWITCHABLE) 1000 for (i = 0; i < 5; i++) 1001 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1002 s->prob.p.comp[i] = 1003 update_prob(&s->c, s->prob.p.comp[i]); 1004 } else { 1005 s->s.h.comppredmode = PRED_SINGLEREF; 1006 } 1007 1008 if (s->s.h.comppredmode != PRED_COMPREF) { 1009 for (i = 0; i < 5; i++) { 1010 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1011 s->prob.p.single_ref[i][0] = 1012 update_prob(&s->c, s->prob.p.single_ref[i][0]); 1013 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1014 s->prob.p.single_ref[i][1] = 1015 update_prob(&s->c, s->prob.p.single_ref[i][1]); 1016 } 1017 } 1018 1019 if (s->s.h.comppredmode != PRED_SINGLEREF) { 1020 for (i = 0; i < 5; i++) 1021 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1022 s->prob.p.comp_ref[i] = 1023 update_prob(&s->c, s->prob.p.comp_ref[i]); 1024 } 1025 1026 for (i = 0; i < 4; i++) 1027 for (j = 0; j < 9; j++) 1028 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1029 s->prob.p.y_mode[i][j] = 1030 update_prob(&s->c, s->prob.p.y_mode[i][j]); 1031 1032 for (i = 0; i < 4; i++) 1033 for (j = 0; j < 4; j++) 1034 for (k = 0; k < 3; k++) 1035 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1036 s->prob.p.partition[3 - i][j][k] = 1037 update_prob(&s->c, 1038 s->prob.p.partition[3 - i][j][k]); 1039 1040 // mv fields don't use the update_prob subexp model for some reason 1041 for (i = 0; i < 3; i++) 1042 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1043 s->prob.p.mv_joint[i] = (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1044 1045 for (i = 0; i < 2; i++) { 1046 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1047 s->prob.p.mv_comp[i].sign = 1048 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1049 1050 for (j = 0; j < 10; j++) 1051 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1052 s->prob.p.mv_comp[i].classes[j] = 1053 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1054 1055 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1056 s->prob.p.mv_comp[i].class0 = 1057 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1058 1059 for (j = 0; j < 10; j++) 1060 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1061 s->prob.p.mv_comp[i].bits[j] = 1062 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1063 } 1064 1065 for (i = 0; i < 2; i++) { 1066 for (j = 0; j < 2; j++) 1067 for (k = 0; k < 3; k++) 1068 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1069 s->prob.p.mv_comp[i].class0_fp[j][k] = 1070 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1071 1072 for (j = 0; j < 3; j++) 1073 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1074 s->prob.p.mv_comp[i].fp[j] = 1075 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1076 } 1077 1078 if (s->s.h.highprecisionmvs) { 1079 for (i = 0; i < 2; i++) { 1080 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1081 s->prob.p.mv_comp[i].class0_hp = 1082 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1083 1084 if (vpx_rac_get_prob_branchy(&s->c, 252)) 1085 s->prob.p.mv_comp[i].hp = 1086 (vp89_rac_get_uint(&s->c, 7) << 1) | 1; 1087 } 1088 } 1089 } 1090 1091 return (data2 - data) + size2; 1092 } 1093 1094 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl, 1095 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl) 1096 { 1097 const VP9Context *s = td->s; 1098 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) | 1099 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1); 1100 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] : 1101 s->prob.p.partition[bl][c]; 1102 enum BlockPartition bp; 1103 ptrdiff_t hbs = 4 >> bl; 1104 AVFrame *f = s->s.frames[CUR_FRAME].tf.f; 1105 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; 1106 int bytesperpixel = s->bytesperpixel; 1107 1108 if (bl == BL_8X8) { 1109 bp = vp89_rac_get_tree(td->c, ff_vp9_partition_tree, p); 1110 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); 1111 } else if (col + hbs < s->cols) { // FIXME why not <=? 1112 if (row + hbs < s->rows) { // FIXME why not <=? 1113 bp = vp89_rac_get_tree(td->c, ff_vp9_partition_tree, p); 1114 switch (bp) { 1115 case PARTITION_NONE: 1116 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); 1117 break; 1118 case PARTITION_H: 1119 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); 1120 yoff += hbs * 8 * y_stride; 1121 uvoff += hbs * 8 * uv_stride >> s->ss_v; 1122 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp); 1123 break; 1124 case PARTITION_V: 1125 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); 1126 yoff += hbs * 8 * bytesperpixel; 1127 uvoff += hbs * 8 * bytesperpixel >> s->ss_h; 1128 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp); 1129 break; 1130 case PARTITION_SPLIT: 1131 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); 1132 decode_sb(td, row, col + hbs, lflvl, 1133 yoff + 8 * hbs * bytesperpixel, 1134 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); 1135 yoff += hbs * 8 * y_stride; 1136 uvoff += hbs * 8 * uv_stride >> s->ss_v; 1137 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); 1138 decode_sb(td, row + hbs, col + hbs, lflvl, 1139 yoff + 8 * hbs * bytesperpixel, 1140 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); 1141 break; 1142 default: 1143 av_assert0(0); 1144 } 1145 } else if (vpx_rac_get_prob_branchy(td->c, p[1])) { 1146 bp = PARTITION_SPLIT; 1147 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); 1148 decode_sb(td, row, col + hbs, lflvl, 1149 yoff + 8 * hbs * bytesperpixel, 1150 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); 1151 } else { 1152 bp = PARTITION_H; 1153 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); 1154 } 1155 } else if (row + hbs < s->rows) { // FIXME why not <=? 1156 if (vpx_rac_get_prob_branchy(td->c, p[2])) { 1157 bp = PARTITION_SPLIT; 1158 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); 1159 yoff += hbs * 8 * y_stride; 1160 uvoff += hbs * 8 * uv_stride >> s->ss_v; 1161 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); 1162 } else { 1163 bp = PARTITION_V; 1164 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp); 1165 } 1166 } else { 1167 bp = PARTITION_SPLIT; 1168 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1); 1169 } 1170 td->counts.partition[bl][c][bp]++; 1171 } 1172 1173 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl, 1174 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl) 1175 { 1176 const VP9Context *s = td->s; 1177 VP9Block *b = td->b; 1178 ptrdiff_t hbs = 4 >> bl; 1179 AVFrame *f = s->s.frames[CUR_FRAME].tf.f; 1180 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1]; 1181 int bytesperpixel = s->bytesperpixel; 1182 1183 if (bl == BL_8X8) { 1184 av_assert2(b->bl == BL_8X8); 1185 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp); 1186 } else if (td->b->bl == bl) { 1187 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp); 1188 if (b->bp == PARTITION_H && row + hbs < s->rows) { 1189 yoff += hbs * 8 * y_stride; 1190 uvoff += hbs * 8 * uv_stride >> s->ss_v; 1191 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp); 1192 } else if (b->bp == PARTITION_V && col + hbs < s->cols) { 1193 yoff += hbs * 8 * bytesperpixel; 1194 uvoff += hbs * 8 * bytesperpixel >> s->ss_h; 1195 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp); 1196 } 1197 } else { 1198 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1); 1199 if (col + hbs < s->cols) { // FIXME why not <=? 1200 if (row + hbs < s->rows) { 1201 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel, 1202 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); 1203 yoff += hbs * 8 * y_stride; 1204 uvoff += hbs * 8 * uv_stride >> s->ss_v; 1205 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); 1206 decode_sb_mem(td, row + hbs, col + hbs, lflvl, 1207 yoff + 8 * hbs * bytesperpixel, 1208 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1); 1209 } else { 1210 yoff += hbs * 8 * bytesperpixel; 1211 uvoff += hbs * 8 * bytesperpixel >> s->ss_h; 1212 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1); 1213 } 1214 } else if (row + hbs < s->rows) { 1215 yoff += hbs * 8 * y_stride; 1216 uvoff += hbs * 8 * uv_stride >> s->ss_v; 1217 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1); 1218 } 1219 } 1220 } 1221 1222 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n) 1223 { 1224 int sb_start = ( idx * n) >> log2_n; 1225 int sb_end = ((idx + 1) * n) >> log2_n; 1226 *start = FFMIN(sb_start, n) << 3; 1227 *end = FFMIN(sb_end, n) << 3; 1228 } 1229 1230 static void free_buffers(VP9Context *s) 1231 { 1232 int i; 1233 1234 av_freep(&s->intra_pred_data[0]); 1235 for (i = 0; i < s->active_tile_cols; i++) 1236 vp9_tile_data_free(&s->td[i]); 1237 } 1238 1239 static av_cold int vp9_decode_free(AVCodecContext *avctx) 1240 { 1241 VP9Context *s = avctx->priv_data; 1242 int i; 1243 1244 for (int i = 0; i < 3; i++) 1245 vp9_frame_unref(&s->s.frames[i]); 1246 av_refstruct_pool_uninit(&s->frame_extradata_pool); 1247 for (i = 0; i < 8; i++) { 1248 ff_progress_frame_unref(&s->s.refs[i]); 1249 ff_progress_frame_unref(&s->next_refs[i]); 1250 } 1251 1252 free_buffers(s); 1253 #if HAVE_THREADS 1254 av_freep(&s->entries); 1255 ff_pthread_free(s, vp9_context_offsets); 1256 #endif 1257 av_freep(&s->td); 1258 return 0; 1259 } 1260 1261 static int decode_tiles(AVCodecContext *avctx, 1262 const uint8_t *data, int size) 1263 { 1264 VP9Context *s = avctx->priv_data; 1265 VP9TileData *td = &s->td[0]; 1266 int row, col, tile_row, tile_col, ret; 1267 int bytesperpixel; 1268 int tile_row_start, tile_row_end, tile_col_start, tile_col_end; 1269 AVFrame *f; 1270 ptrdiff_t yoff, uvoff, ls_y, ls_uv; 1271 1272 f = s->s.frames[CUR_FRAME].tf.f; 1273 ls_y = f->linesize[0]; 1274 ls_uv =f->linesize[1]; 1275 bytesperpixel = s->bytesperpixel; 1276 1277 yoff = uvoff = 0; 1278 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { 1279 set_tile_offset(&tile_row_start, &tile_row_end, 1280 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows); 1281 1282 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { 1283 int64_t tile_size; 1284 1285 if (tile_col == s->s.h.tiling.tile_cols - 1 && 1286 tile_row == s->s.h.tiling.tile_rows - 1) { 1287 tile_size = size; 1288 } else { 1289 tile_size = AV_RB32(data); 1290 data += 4; 1291 size -= 4; 1292 } 1293 if (tile_size > size) 1294 return AVERROR_INVALIDDATA; 1295 ret = ff_vpx_init_range_decoder(&td->c_b[tile_col], data, tile_size); 1296 if (ret < 0) 1297 return ret; 1298 if (vpx_rac_get_prob_branchy(&td->c_b[tile_col], 128)) // marker bit 1299 return AVERROR_INVALIDDATA; 1300 data += tile_size; 1301 size -= tile_size; 1302 } 1303 1304 for (row = tile_row_start; row < tile_row_end; 1305 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) { 1306 VP9Filter *lflvl_ptr = s->lflvl; 1307 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; 1308 1309 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { 1310 set_tile_offset(&tile_col_start, &tile_col_end, 1311 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols); 1312 td->tile_col_start = tile_col_start; 1313 if (s->pass != 2) { 1314 memset(td->left_partition_ctx, 0, 8); 1315 memset(td->left_skip_ctx, 0, 8); 1316 if (s->s.h.keyframe || s->s.h.intraonly) { 1317 memset(td->left_mode_ctx, DC_PRED, 16); 1318 } else { 1319 memset(td->left_mode_ctx, NEARESTMV, 8); 1320 } 1321 memset(td->left_y_nnz_ctx, 0, 16); 1322 memset(td->left_uv_nnz_ctx, 0, 32); 1323 memset(td->left_segpred_ctx, 0, 8); 1324 1325 td->c = &td->c_b[tile_col]; 1326 } 1327 1328 for (col = tile_col_start; 1329 col < tile_col_end; 1330 col += 8, yoff2 += 64 * bytesperpixel, 1331 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { 1332 // FIXME integrate with lf code (i.e. zero after each 1333 // use, similar to invtxfm coefficients, or similar) 1334 if (s->pass != 1) { 1335 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask)); 1336 } 1337 1338 if (s->pass == 2) { 1339 decode_sb_mem(td, row, col, lflvl_ptr, 1340 yoff2, uvoff2, BL_64X64); 1341 } else { 1342 if (vpx_rac_is_end(td->c)) { 1343 return AVERROR_INVALIDDATA; 1344 } 1345 decode_sb(td, row, col, lflvl_ptr, 1346 yoff2, uvoff2, BL_64X64); 1347 } 1348 } 1349 } 1350 1351 if (s->pass == 1) 1352 continue; 1353 1354 // backup pre-loopfilter reconstruction data for intra 1355 // prediction of next row of sb64s 1356 if (row + 8 < s->rows) { 1357 memcpy(s->intra_pred_data[0], 1358 f->data[0] + yoff + 63 * ls_y, 1359 8 * s->cols * bytesperpixel); 1360 memcpy(s->intra_pred_data[1], 1361 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, 1362 8 * s->cols * bytesperpixel >> s->ss_h); 1363 memcpy(s->intra_pred_data[2], 1364 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, 1365 8 * s->cols * bytesperpixel >> s->ss_h); 1366 } 1367 1368 // loopfilter one row 1369 if (s->s.h.filter.level) { 1370 yoff2 = yoff; 1371 uvoff2 = uvoff; 1372 lflvl_ptr = s->lflvl; 1373 for (col = 0; col < s->cols; 1374 col += 8, yoff2 += 64 * bytesperpixel, 1375 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { 1376 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col, 1377 yoff2, uvoff2); 1378 } 1379 } 1380 1381 // FIXME maybe we can make this more finegrained by running the 1382 // loopfilter per-block instead of after each sbrow 1383 // In fact that would also make intra pred left preparation easier? 1384 ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, row >> 3); 1385 } 1386 } 1387 return 0; 1388 } 1389 1390 #if HAVE_THREADS 1391 static av_always_inline 1392 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr, 1393 int threadnr) 1394 { 1395 VP9Context *s = avctx->priv_data; 1396 VP9TileData *td = &s->td[jobnr]; 1397 ptrdiff_t uvoff, yoff, ls_y, ls_uv; 1398 int bytesperpixel = s->bytesperpixel, row, col, tile_row; 1399 unsigned tile_cols_len; 1400 int tile_row_start, tile_row_end, tile_col_start, tile_col_end; 1401 VP9Filter *lflvl_ptr_base; 1402 AVFrame *f; 1403 1404 f = s->s.frames[CUR_FRAME].tf.f; 1405 ls_y = f->linesize[0]; 1406 ls_uv =f->linesize[1]; 1407 1408 set_tile_offset(&tile_col_start, &tile_col_end, 1409 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols); 1410 td->tile_col_start = tile_col_start; 1411 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3); 1412 yoff = (64 * bytesperpixel)*(tile_col_start >> 3); 1413 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3); 1414 1415 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { 1416 set_tile_offset(&tile_row_start, &tile_row_end, 1417 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows); 1418 1419 td->c = &td->c_b[tile_row]; 1420 for (row = tile_row_start; row < tile_row_end; 1421 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) { 1422 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; 1423 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3); 1424 1425 memset(td->left_partition_ctx, 0, 8); 1426 memset(td->left_skip_ctx, 0, 8); 1427 if (s->s.h.keyframe || s->s.h.intraonly) { 1428 memset(td->left_mode_ctx, DC_PRED, 16); 1429 } else { 1430 memset(td->left_mode_ctx, NEARESTMV, 8); 1431 } 1432 memset(td->left_y_nnz_ctx, 0, 16); 1433 memset(td->left_uv_nnz_ctx, 0, 32); 1434 memset(td->left_segpred_ctx, 0, 8); 1435 1436 for (col = tile_col_start; 1437 col < tile_col_end; 1438 col += 8, yoff2 += 64 * bytesperpixel, 1439 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { 1440 // FIXME integrate with lf code (i.e. zero after each 1441 // use, similar to invtxfm coefficients, or similar) 1442 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask)); 1443 decode_sb(td, row, col, lflvl_ptr, 1444 yoff2, uvoff2, BL_64X64); 1445 } 1446 1447 // backup pre-loopfilter reconstruction data for intra 1448 // prediction of next row of sb64s 1449 tile_cols_len = tile_col_end - tile_col_start; 1450 if (row + 8 < s->rows) { 1451 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel), 1452 f->data[0] + yoff + 63 * ls_y, 1453 8 * tile_cols_len * bytesperpixel); 1454 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h), 1455 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, 1456 8 * tile_cols_len * bytesperpixel >> s->ss_h); 1457 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h), 1458 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv, 1459 8 * tile_cols_len * bytesperpixel >> s->ss_h); 1460 } 1461 1462 vp9_report_tile_progress(s, row >> 3, 1); 1463 } 1464 } 1465 return 0; 1466 } 1467 1468 static av_always_inline 1469 int loopfilter_proc(AVCodecContext *avctx) 1470 { 1471 VP9Context *s = avctx->priv_data; 1472 ptrdiff_t uvoff, yoff, ls_y, ls_uv; 1473 VP9Filter *lflvl_ptr; 1474 int bytesperpixel = s->bytesperpixel, col, i; 1475 AVFrame *f; 1476 1477 f = s->s.frames[CUR_FRAME].tf.f; 1478 ls_y = f->linesize[0]; 1479 ls_uv =f->linesize[1]; 1480 1481 for (i = 0; i < s->sb_rows; i++) { 1482 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols); 1483 1484 if (s->s.h.filter.level) { 1485 yoff = (ls_y * 64)*i; 1486 uvoff = (ls_uv * 64 >> s->ss_v)*i; 1487 lflvl_ptr = s->lflvl+s->sb_cols*i; 1488 for (col = 0; col < s->cols; 1489 col += 8, yoff += 64 * bytesperpixel, 1490 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) { 1491 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col, 1492 yoff, uvoff); 1493 } 1494 } 1495 } 1496 return 0; 1497 } 1498 #endif 1499 1500 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame) 1501 { 1502 AVVideoEncParams *par; 1503 unsigned int tile, nb_blocks = 0; 1504 1505 if (s->s.h.segmentation.enabled) { 1506 for (tile = 0; tile < s->active_tile_cols; tile++) 1507 nb_blocks += s->td[tile].nb_block_structure; 1508 } 1509 1510 par = av_video_enc_params_create_side_data(frame->tf.f, 1511 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks); 1512 if (!par) 1513 return AVERROR(ENOMEM); 1514 1515 par->qp = s->s.h.yac_qi; 1516 par->delta_qp[0][0] = s->s.h.ydc_qdelta; 1517 par->delta_qp[1][0] = s->s.h.uvdc_qdelta; 1518 par->delta_qp[2][0] = s->s.h.uvdc_qdelta; 1519 par->delta_qp[1][1] = s->s.h.uvac_qdelta; 1520 par->delta_qp[2][1] = s->s.h.uvac_qdelta; 1521 1522 if (nb_blocks) { 1523 unsigned int block = 0; 1524 unsigned int tile, block_tile; 1525 1526 for (tile = 0; tile < s->active_tile_cols; tile++) { 1527 VP9TileData *td = &s->td[tile]; 1528 1529 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) { 1530 AVVideoBlockParams *b = av_video_enc_params_block(par, block++); 1531 unsigned int row = td->block_structure[block_tile].row; 1532 unsigned int col = td->block_structure[block_tile].col; 1533 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col]; 1534 1535 b->src_x = col * 8; 1536 b->src_y = row * 8; 1537 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x); 1538 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y); 1539 1540 if (s->s.h.segmentation.feat[seg_id].q_enabled) { 1541 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val; 1542 if (s->s.h.segmentation.absolute_vals) 1543 b->delta_qp -= par->qp; 1544 } 1545 } 1546 } 1547 } 1548 1549 return 0; 1550 } 1551 1552 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame, 1553 int *got_frame, AVPacket *pkt) 1554 { 1555 const uint8_t *data = pkt->data; 1556 int size = pkt->size; 1557 VP9Context *s = avctx->priv_data; 1558 int ret, i, j, ref; 1559 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map && 1560 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map); 1561 const VP9Frame *src; 1562 AVFrame *f; 1563 1564 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) { 1565 return ret; 1566 } else if (ret == 0) { 1567 if (!s->s.refs[ref].f) { 1568 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref); 1569 return AVERROR_INVALIDDATA; 1570 } 1571 for (int i = 0; i < 8; i++) 1572 ff_progress_frame_replace(&s->next_refs[i], &s->s.refs[i]); 1573 ff_thread_finish_setup(avctx); 1574 ff_progress_frame_await(&s->s.refs[ref], INT_MAX); 1575 1576 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0) 1577 return ret; 1578 frame->pts = pkt->pts; 1579 frame->pkt_dts = pkt->dts; 1580 *got_frame = 1; 1581 return pkt->size; 1582 } 1583 data += ret; 1584 size -= ret; 1585 1586 src = !s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres ? 1587 &s->s.frames[CUR_FRAME] : &s->s.frames[BLANK_FRAME]; 1588 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) 1589 vp9_frame_replace(&s->s.frames[REF_FRAME_SEGMAP], src); 1590 vp9_frame_replace(&s->s.frames[REF_FRAME_MVPAIR], src); 1591 vp9_frame_unref(&s->s.frames[CUR_FRAME]); 1592 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0) 1593 return ret; 1594 f = s->s.frames[CUR_FRAME].tf.f; 1595 if (s->s.h.keyframe) 1596 f->flags |= AV_FRAME_FLAG_KEY; 1597 else 1598 f->flags &= ~AV_FRAME_FLAG_KEY; 1599 if (s->s.h.lossless) 1600 f->flags |= AV_FRAME_FLAG_LOSSLESS; 1601 else 1602 f->flags &= ~AV_FRAME_FLAG_LOSSLESS; 1603 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; 1604 1605 // Non-existent frames have the implicit dimension 0x0 != CUR_FRAME 1606 if (!s->s.frames[REF_FRAME_MVPAIR].tf.f || 1607 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width || 1608 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) { 1609 vp9_frame_unref(&s->s.frames[REF_FRAME_SEGMAP]); 1610 } 1611 1612 // ref frame setup 1613 for (i = 0; i < 8; i++) { 1614 ff_progress_frame_replace(&s->next_refs[i], 1615 s->s.h.refreshrefmask & (1 << i) ? 1616 &s->s.frames[CUR_FRAME].tf : &s->s.refs[i]); 1617 } 1618 1619 if (avctx->hwaccel) { 1620 const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel); 1621 ret = hwaccel->start_frame(avctx, NULL, 0); 1622 if (ret < 0) 1623 return ret; 1624 ret = hwaccel->decode_slice(avctx, pkt->data, pkt->size); 1625 if (ret < 0) 1626 return ret; 1627 ret = hwaccel->end_frame(avctx); 1628 if (ret < 0) 1629 return ret; 1630 goto finish; 1631 } 1632 1633 // main tile decode loop 1634 memset(s->above_partition_ctx, 0, s->cols); 1635 memset(s->above_skip_ctx, 0, s->cols); 1636 if (s->s.h.keyframe || s->s.h.intraonly) { 1637 memset(s->above_mode_ctx, DC_PRED, s->cols * 2); 1638 } else { 1639 memset(s->above_mode_ctx, NEARESTMV, s->cols); 1640 } 1641 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16); 1642 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h); 1643 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h); 1644 memset(s->above_segpred_ctx, 0, s->cols); 1645 s->pass = s->s.frames[CUR_FRAME].uses_2pass = 1646 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode; 1647 if ((ret = update_block_buffers(avctx)) < 0) { 1648 av_log(avctx, AV_LOG_ERROR, 1649 "Failed to allocate block buffers\n"); 1650 return ret; 1651 } 1652 if (s->s.h.refreshctx && s->s.h.parallelmode) { 1653 int j, k, l, m; 1654 1655 for (i = 0; i < 4; i++) { 1656 for (j = 0; j < 2; j++) 1657 for (k = 0; k < 2; k++) 1658 for (l = 0; l < 6; l++) 1659 for (m = 0; m < 6; m++) 1660 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m], 1661 s->prob.coef[i][j][k][l][m], 3); 1662 if (s->s.h.txfmmode == i) 1663 break; 1664 } 1665 s->prob_ctx[s->s.h.framectxid].p = s->prob.p; 1666 ff_thread_finish_setup(avctx); 1667 } else if (!s->s.h.refreshctx) { 1668 ff_thread_finish_setup(avctx); 1669 } 1670 1671 #if HAVE_THREADS 1672 if (avctx->active_thread_type & FF_THREAD_SLICE) { 1673 for (i = 0; i < s->sb_rows; i++) 1674 atomic_init(&s->entries[i], 0); 1675 } 1676 #endif 1677 1678 do { 1679 for (i = 0; i < s->active_tile_cols; i++) { 1680 s->td[i].b = s->td[i].b_base; 1681 s->td[i].block = s->td[i].block_base; 1682 s->td[i].uvblock[0] = s->td[i].uvblock_base[0]; 1683 s->td[i].uvblock[1] = s->td[i].uvblock_base[1]; 1684 s->td[i].eob = s->td[i].eob_base; 1685 s->td[i].uveob[0] = s->td[i].uveob_base[0]; 1686 s->td[i].uveob[1] = s->td[i].uveob_base[1]; 1687 s->td[i].error_info = 0; 1688 } 1689 1690 #if HAVE_THREADS 1691 if (avctx->active_thread_type == FF_THREAD_SLICE) { 1692 int tile_row, tile_col; 1693 1694 av_assert1(!s->pass); 1695 1696 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) { 1697 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) { 1698 int64_t tile_size; 1699 1700 if (tile_col == s->s.h.tiling.tile_cols - 1 && 1701 tile_row == s->s.h.tiling.tile_rows - 1) { 1702 tile_size = size; 1703 } else { 1704 tile_size = AV_RB32(data); 1705 data += 4; 1706 size -= 4; 1707 } 1708 if (tile_size > size) 1709 return AVERROR_INVALIDDATA; 1710 ret = ff_vpx_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size); 1711 if (ret < 0) 1712 return ret; 1713 if (vpx_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit 1714 return AVERROR_INVALIDDATA; 1715 data += tile_size; 1716 size -= tile_size; 1717 } 1718 } 1719 1720 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols); 1721 } else 1722 #endif 1723 { 1724 ret = decode_tiles(avctx, data, size); 1725 if (ret < 0) 1726 goto fail; 1727 } 1728 1729 // Sum all counts fields into td[0].counts for tile threading 1730 if (avctx->active_thread_type == FF_THREAD_SLICE) 1731 for (i = 1; i < s->s.h.tiling.tile_cols; i++) 1732 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++) 1733 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j]; 1734 1735 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) { 1736 ff_vp9_adapt_probs(s); 1737 ff_thread_finish_setup(avctx); 1738 } 1739 } while (s->pass++ == 1); 1740 1741 if (s->td->error_info < 0) { 1742 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n"); 1743 s->td->error_info = 0; 1744 ret = AVERROR_INVALIDDATA; 1745 goto fail; 1746 } 1747 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) { 1748 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]); 1749 if (ret < 0) 1750 goto fail; 1751 } 1752 1753 finish: 1754 ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX); 1755 // ref frame setup 1756 for (int i = 0; i < 8; i++) 1757 ff_progress_frame_replace(&s->s.refs[i], &s->next_refs[i]); 1758 1759 if (!s->s.h.invisible) { 1760 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0) 1761 return ret; 1762 *got_frame = 1; 1763 } 1764 1765 return pkt->size; 1766 fail: 1767 ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX); 1768 return ret; 1769 } 1770 1771 static void vp9_decode_flush(AVCodecContext *avctx) 1772 { 1773 VP9Context *s = avctx->priv_data; 1774 int i; 1775 1776 for (i = 0; i < 3; i++) 1777 vp9_frame_unref(&s->s.frames[i]); 1778 for (i = 0; i < 8; i++) 1779 ff_progress_frame_unref(&s->s.refs[i]); 1780 1781 if (FF_HW_HAS_CB(avctx, flush)) 1782 FF_HW_SIMPLE_CALL(avctx, flush); 1783 } 1784 1785 static av_cold int vp9_decode_init(AVCodecContext *avctx) 1786 { 1787 VP9Context *s = avctx->priv_data; 1788 int ret; 1789 1790 s->last_bpp = 0; 1791 s->s.h.filter.sharpness = -1; 1792 1793 #if HAVE_THREADS 1794 if (avctx->active_thread_type & FF_THREAD_SLICE) { 1795 ret = ff_pthread_init(s, vp9_context_offsets); 1796 if (ret < 0) 1797 return ret; 1798 } 1799 #endif 1800 1801 return 0; 1802 } 1803 1804 #if HAVE_THREADS 1805 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) 1806 { 1807 VP9Context *s = dst->priv_data, *ssrc = src->priv_data; 1808 1809 for (int i = 0; i < 3; i++) 1810 vp9_frame_replace(&s->s.frames[i], &ssrc->s.frames[i]); 1811 for (int i = 0; i < 8; i++) 1812 ff_progress_frame_replace(&s->s.refs[i], &ssrc->next_refs[i]); 1813 av_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool); 1814 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size; 1815 1816 s->s.h.invisible = ssrc->s.h.invisible; 1817 s->s.h.keyframe = ssrc->s.h.keyframe; 1818 s->s.h.intraonly = ssrc->s.h.intraonly; 1819 s->ss_v = ssrc->ss_v; 1820 s->ss_h = ssrc->ss_h; 1821 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled; 1822 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map; 1823 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals; 1824 s->bytesperpixel = ssrc->bytesperpixel; 1825 s->gf_fmt = ssrc->gf_fmt; 1826 s->w = ssrc->w; 1827 s->h = ssrc->h; 1828 s->s.h.bpp = ssrc->s.h.bpp; 1829 s->bpp_index = ssrc->bpp_index; 1830 s->pix_fmt = ssrc->pix_fmt; 1831 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx)); 1832 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta)); 1833 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat, 1834 sizeof(s->s.h.segmentation.feat)); 1835 1836 return 0; 1837 } 1838 #endif 1839 1840 const FFCodec ff_vp9_decoder = { 1841 .p.name = "vp9", 1842 CODEC_LONG_NAME("Google VP9"), 1843 .p.type = AVMEDIA_TYPE_VIDEO, 1844 .p.id = AV_CODEC_ID_VP9, 1845 .priv_data_size = sizeof(VP9Context), 1846 .init = vp9_decode_init, 1847 .close = vp9_decode_free, 1848 FF_CODEC_DECODE_CB(vp9_decode_frame), 1849 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, 1850 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | 1851 FF_CODEC_CAP_SLICE_THREAD_HAS_MF | 1852 FF_CODEC_CAP_USES_PROGRESSFRAMES, 1853 .flush = vp9_decode_flush, 1854 UPDATE_THREAD_CONTEXT(vp9_decode_update_thread_context), 1855 .p.profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles), 1856 .bsfs = "vp9_superframe_split", 1857 .hw_configs = (const AVCodecHWConfigInternal *const []) { 1858 #if CONFIG_VP9_DXVA2_HWACCEL 1859 HWACCEL_DXVA2(vp9), 1860 #endif 1861 #if CONFIG_VP9_D3D11VA_HWACCEL 1862 HWACCEL_D3D11VA(vp9), 1863 #endif 1864 #if CONFIG_VP9_D3D11VA2_HWACCEL 1865 HWACCEL_D3D11VA2(vp9), 1866 #endif 1867 #if CONFIG_VP9_D3D12VA_HWACCEL 1868 HWACCEL_D3D12VA(vp9), 1869 #endif 1870 #if CONFIG_VP9_NVDEC_HWACCEL 1871 HWACCEL_NVDEC(vp9), 1872 #endif 1873 #if CONFIG_VP9_VAAPI_HWACCEL 1874 HWACCEL_VAAPI(vp9), 1875 #endif 1876 #if CONFIG_VP9_VDPAU_HWACCEL 1877 HWACCEL_VDPAU(vp9), 1878 #endif 1879 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL 1880 HWACCEL_VIDEOTOOLBOX(vp9), 1881 #endif 1882 NULL 1883 }, 1884 };