decode.c (162202B)
1 /* 2 * Copyright © 2018-2021, VideoLAN and dav1d authors 3 * Copyright © 2018, Two Orioles, LLC 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 30 #include <errno.h> 31 #include <limits.h> 32 #include <string.h> 33 #include <stdio.h> 34 #include <inttypes.h> 35 36 #include "dav1d/data.h" 37 38 #include "common/frame.h" 39 #include "common/intops.h" 40 41 #include "src/ctx.h" 42 #include "src/decode.h" 43 #include "src/dequant_tables.h" 44 #include "src/env.h" 45 #include "src/filmgrain.h" 46 #include "src/log.h" 47 #include "src/qm.h" 48 #include "src/recon.h" 49 #include "src/ref.h" 50 #include "src/tables.h" 51 #include "src/thread_task.h" 52 #include "src/warpmv.h" 53 54 static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr, 55 const Dav1dFrameHeader *const frame_hdr, 56 const int qidx, uint16_t (*dq)[3][2]) 57 { 58 for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) { 59 const int yac = frame_hdr->segmentation.enabled ? 60 iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx; 61 const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta); 62 const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta); 63 const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta); 64 const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta); 65 const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta); 66 67 dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0]; 68 dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1]; 69 dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0]; 70 dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1]; 71 dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0]; 72 dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1]; 73 } 74 } 75 76 static int read_mv_component_diff(MsacContext *const msac, 77 CdfMvComponent *const mv_comp, 78 const int mv_prec) 79 { 80 const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign); 81 const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10); 82 int up, fp = 3, hp = 1; 83 84 if (!cl) { 85 up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0); 86 if (mv_prec >= 0) { // !force_integer_mv 87 fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3); 88 if (mv_prec > 0) // allow_high_precision_mv 89 hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp); 90 } 91 } else { 92 up = 1 << cl; 93 for (int n = 0; n < cl; n++) 94 up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n; 95 if (mv_prec >= 0) { // !force_integer_mv 96 fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3); 97 if (mv_prec > 0) // allow_high_precision_mv 98 hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp); 99 } 100 } 101 102 const int diff = ((up << 3) | (fp << 1) | hp) + 1; 103 104 return sign ? -diff : diff; 105 } 106 107 static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv, 108 const int mv_prec) 109 { 110 MsacContext *const msac = &ts->msac; 111 const enum MVJoint mv_joint = 112 dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1); 113 if (mv_joint & MV_JOINT_V) 114 ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec); 115 if (mv_joint & MV_JOINT_H) 116 ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec); 117 } 118 119 static void read_tx_tree(Dav1dTaskContext *const t, 120 const enum RectTxfmSize from, 121 const int depth, uint16_t *const masks, 122 const int x_off, const int y_off) 123 { 124 const Dav1dFrameContext *const f = t->f; 125 const int bx4 = t->bx & 31, by4 = t->by & 31; 126 const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from]; 127 const int txw = t_dim->lw, txh = t_dim->lh; 128 int is_split; 129 130 if (depth < 2 && from > (int) TX_4X4) { 131 const int cat = 2 * (TX_64X64 - t_dim->max) - depth; 132 const int a = t->a->tx[bx4] < txw; 133 const int l = t->l.tx[by4] < txh; 134 135 is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac, 136 t->ts->cdf.m.txpart[cat][a + l]); 137 if (is_split) 138 masks[depth] |= 1 << (y_off * 4 + x_off); 139 } else { 140 is_split = 0; 141 } 142 143 if (is_split && t_dim->max > TX_8X8) { 144 const enum RectTxfmSize sub = t_dim->sub; 145 const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub]; 146 const int txsw = sub_t_dim->w, txsh = sub_t_dim->h; 147 148 read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0); 149 t->bx += txsw; 150 if (txw >= txh && t->bx < f->bw) 151 read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0); 152 t->bx -= txsw; 153 t->by += txsh; 154 if (txh >= txw && t->by < f->bh) { 155 read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1); 156 t->bx += txsw; 157 if (txw >= txh && t->bx < f->bw) 158 read_tx_tree(t, sub, depth + 1, masks, 159 x_off * 2 + 1, y_off * 2 + 1); 160 t->bx -= txsw; 161 } 162 t->by -= txsh; 163 } else { 164 dav1d_memset_pow2[t_dim->lw](&t->a->tx[bx4], is_split ? TX_4X4 : txw); 165 dav1d_memset_pow2[t_dim->lh](&t->l.tx[by4], is_split ? TX_4X4 : txh); 166 } 167 } 168 169 static int neg_deinterleave(int diff, int ref, int max) { 170 if (!ref) return diff; 171 if (ref >= (max - 1)) return max - diff - 1; 172 if (2 * ref < max) { 173 if (diff <= 2 * ref) { 174 if (diff & 1) 175 return ref + ((diff + 1) >> 1); 176 else 177 return ref - (diff >> 1); 178 } 179 return diff; 180 } else { 181 if (diff <= 2 * (max - ref - 1)) { 182 if (diff & 1) 183 return ref + ((diff + 1) >> 1); 184 else 185 return ref - (diff >> 1); 186 } 187 return max - (diff + 1); 188 } 189 } 190 191 static void find_matching_ref(const Dav1dTaskContext *const t, 192 const enum EdgeFlags intra_edge_flags, 193 const int bw4, const int bh4, 194 const int w4, const int h4, 195 const int have_left, const int have_top, 196 const int ref, uint64_t masks[2]) 197 { 198 /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5]; 199 int count = 0; 200 int have_topleft = have_top && have_left; 201 int have_topright = imax(bw4, bh4) < 32 && 202 have_top && t->bx + bw4 < t->ts->tiling.col_end && 203 (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT); 204 205 #define bs(rp) dav1d_block_dimensions[(rp)->bs] 206 #define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1) 207 208 if (have_top) { 209 const refmvs_block *r2 = &r[-1][t->bx]; 210 if (matches(r2)) { 211 masks[0] |= 1; 212 count = 1; 213 } 214 int aw4 = bs(r2)[0]; 215 if (aw4 >= bw4) { 216 const int off = t->bx & (aw4 - 1); 217 if (off) have_topleft = 0; 218 if (aw4 - off > bw4) have_topright = 0; 219 } else { 220 unsigned mask = 1 << aw4; 221 for (int x = aw4; x < w4; x += aw4) { 222 r2 += aw4; 223 if (matches(r2)) { 224 masks[0] |= mask; 225 if (++count >= 8) return; 226 } 227 aw4 = bs(r2)[0]; 228 mask <<= aw4; 229 } 230 } 231 } 232 if (have_left) { 233 /*const*/ refmvs_block *const *r2 = r; 234 if (matches(&r2[0][t->bx - 1])) { 235 masks[1] |= 1; 236 if (++count >= 8) return; 237 } 238 int lh4 = bs(&r2[0][t->bx - 1])[1]; 239 if (lh4 >= bh4) { 240 if (t->by & (lh4 - 1)) have_topleft = 0; 241 } else { 242 unsigned mask = 1 << lh4; 243 for (int y = lh4; y < h4; y += lh4) { 244 r2 += lh4; 245 if (matches(&r2[0][t->bx - 1])) { 246 masks[1] |= mask; 247 if (++count >= 8) return; 248 } 249 lh4 = bs(&r2[0][t->bx - 1])[1]; 250 mask <<= lh4; 251 } 252 } 253 } 254 if (have_topleft && matches(&r[-1][t->bx - 1])) { 255 masks[1] |= 1ULL << 32; 256 if (++count >= 8) return; 257 } 258 if (have_topright && matches(&r[-1][t->bx + bw4])) { 259 masks[0] |= 1ULL << 32; 260 } 261 #undef matches 262 } 263 264 static void derive_warpmv(const Dav1dTaskContext *const t, 265 const int bw4, const int bh4, 266 const uint64_t masks[2], const union mv mv, 267 Dav1dWarpedMotionParams *const wmp) 268 { 269 int pts[8][2 /* in, out */][2 /* x, y */], np = 0; 270 /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5]; 271 272 #define add_sample(dx, dy, sx, sy, rp) do { \ 273 pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \ 274 pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \ 275 pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \ 276 pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \ 277 np++; \ 278 } while (0) 279 280 // use masks[] to find the projectable motion vectors in the edges 281 if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) { 282 const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1); 283 add_sample(-off, 0, 1, -1, &r[-1][t->bx]); 284 } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top 285 const int tz = ctz(xmask); 286 off += tz; 287 xmask >>= tz; 288 add_sample(off, 0, 1, -1, &r[-1][t->bx + off]); 289 xmask &= ~1; 290 } 291 if (np < 8 && masks[1] == 1) { 292 const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1); 293 add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]); 294 } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left 295 const int tz = ctz(ymask); 296 off += tz; 297 ymask >>= tz; 298 add_sample(0, off, -1, 1, &r[off][t->bx - 1]); 299 ymask &= ~1; 300 } 301 if (np < 8 && masks[1] >> 32) // top/left 302 add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]); 303 if (np < 8 && masks[0] >> 32) // top/right 304 add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]); 305 assert(np > 0 && np <= 8); 306 #undef bs 307 308 // select according to motion vector difference against a threshold 309 int mvd[8], ret = 0; 310 const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28); 311 for (int i = 0; i < np; i++) { 312 mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) + 313 abs(pts[i][1][1] - pts[i][0][1] - mv.y); 314 if (mvd[i] > thresh) 315 mvd[i] = -1; 316 else 317 ret++; 318 } 319 if (!ret) { 320 ret = 1; 321 } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) { 322 while (mvd[i] != -1) i++; 323 while (mvd[j] == -1) j--; 324 assert(i != j); 325 if (i > j) break; 326 // replace the discarded samples; 327 mvd[i] = mvd[j]; 328 memcpy(pts[i], pts[j], sizeof(*pts)); 329 } 330 331 if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) && 332 !dav1d_get_shear_params(wmp)) 333 { 334 wmp->type = DAV1D_WM_TYPE_AFFINE; 335 } else 336 wmp->type = DAV1D_WM_TYPE_IDENTITY; 337 } 338 339 static inline int findoddzero(const uint8_t *buf, int len) { 340 for (int n = 0; n < len; n++) 341 if (!buf[n * 2]) return 1; 342 return 0; 343 } 344 345 // meant to be SIMD'able, so that theoretical complexity of this function 346 // times block size goes from w4*h4 to w4+h4-1 347 // a and b are previous two lines containing (a) top/left entries or (b) 348 // top/left entries, with a[0] being either the first top or first left entry, 349 // depending on top_offset being 1 or 0, and b being the first top/left entry 350 // for whichever has one. left_offset indicates whether the (len-1)th entry 351 // has a left neighbour. 352 // output is order[] and ctx for each member of this diagonal. 353 static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride, 354 const int i, const int first, const int last, 355 uint8_t (*const order)[8], uint8_t *const ctx) 356 { 357 int have_top = i > first; 358 359 assert(pal_idx); 360 pal_idx += first + (i - first) * stride; 361 for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) { 362 const int have_left = j > 0; 363 364 assert(have_left || have_top); 365 366 #define add(v_in) do { \ 367 const int v = v_in; \ 368 assert((unsigned)v < 8U); \ 369 order[n][o_idx++] = v; \ 370 mask |= 1 << v; \ 371 } while (0) 372 373 unsigned mask = 0; 374 int o_idx = 0; 375 if (!have_left) { 376 ctx[n] = 0; 377 add(pal_idx[-stride]); 378 } else if (!have_top) { 379 ctx[n] = 0; 380 add(pal_idx[-1]); 381 } else { 382 const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)]; 383 const int same_t_l = t == l; 384 const int same_t_tl = t == tl; 385 const int same_l_tl = l == tl; 386 const int same_all = same_t_l & same_t_tl & same_l_tl; 387 388 if (same_all) { 389 ctx[n] = 4; 390 add(t); 391 } else if (same_t_l) { 392 ctx[n] = 3; 393 add(t); 394 add(tl); 395 } else if (same_t_tl | same_l_tl) { 396 ctx[n] = 2; 397 add(tl); 398 add(same_t_tl ? l : t); 399 } else { 400 ctx[n] = 1; 401 add(imin(t, l)); 402 add(imax(t, l)); 403 add(tl); 404 } 405 } 406 for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++) 407 if (!(mask & m)) 408 order[n][o_idx++] = bit; 409 assert(o_idx == 8); 410 #undef add 411 } 412 } 413 414 static void read_pal_indices(Dav1dTaskContext *const t, 415 uint8_t *const pal_idx, 416 const int pal_sz, const int pl, 417 const int w4, const int h4, 418 const int bw4, const int bh4) 419 { 420 Dav1dTileState *const ts = t->ts; 421 const ptrdiff_t stride = bw4 * 4; 422 assert(pal_idx); 423 uint8_t *const pal_tmp = t->scratch.pal_idx_uv; 424 pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, pal_sz); 425 uint16_t (*const color_map_cdf)[8] = 426 ts->cdf.m.color_map[pl][pal_sz - 2]; 427 uint8_t (*const order)[8] = t->scratch.pal_order; 428 uint8_t *const ctx = t->scratch.pal_ctx; 429 for (int i = 1; i < 4 * (w4 + h4) - 1; i++) { 430 // top/left-to-bottom/right diagonals ("wave-front") 431 const int first = imin(i, w4 * 4 - 1); 432 const int last = imax(0, i - h4 * 4 + 1); 433 order_palette(pal_tmp, stride, i, first, last, order, ctx); 434 for (int j = first, m = 0; j >= last; j--, m++) { 435 const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac, 436 color_map_cdf[ctx[m]], pal_sz - 1); 437 pal_tmp[(i - j) * stride + j] = order[m][color_idx]; 438 } 439 } 440 441 t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4, 442 w4 * 4, h4 * 4); 443 } 444 445 static void read_vartx_tree(Dav1dTaskContext *const t, 446 Av1Block *const b, const enum BlockSize bs, 447 const int bx4, const int by4) 448 { 449 const Dav1dFrameContext *const f = t->f; 450 const uint8_t *const b_dim = dav1d_block_dimensions[bs]; 451 const int bw4 = b_dim[0], bh4 = b_dim[1]; 452 453 // var-tx tree coding 454 uint16_t tx_split[2] = { 0 }; 455 b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0]; 456 if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] || 457 b->max_ytx == TX_4X4)) 458 { 459 b->max_ytx = b->uvtx = TX_4X4; 460 if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) { 461 dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], TX_4X4); 462 dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], TX_4X4); 463 } 464 } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) { 465 if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) { 466 dav1d_memset_pow2[b_dim[2]](&t->a->tx[bx4], b_dim[2 + 0]); 467 dav1d_memset_pow2[b_dim[3]](&t->l.tx[by4], b_dim[2 + 1]); 468 } 469 b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout]; 470 } else { 471 assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64); 472 int y, x, y_off, x_off; 473 const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx]; 474 for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) { 475 for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) { 476 read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off); 477 // contexts are updated inside read_tx_tree() 478 t->bx += ytx->w; 479 } 480 t->bx -= x; 481 t->by += ytx->h; 482 } 483 t->by -= y; 484 if (DEBUG_BLOCK_INFO) 485 printf("Post-vartxtree[%x/%x]: r=%d\n", 486 tx_split[0], tx_split[1], t->ts->msac.rng); 487 b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout]; 488 } 489 assert(!(tx_split[0] & ~0x33)); 490 b->tx_split0 = (uint8_t)tx_split[0]; 491 b->tx_split1 = tx_split[1]; 492 } 493 494 static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f, 495 const int by, const int bx, 496 const int w4, int h4, 497 const uint8_t *ref_seg_map, 498 const ptrdiff_t stride) 499 { 500 assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE); 501 502 unsigned seg_id = 8; 503 ref_seg_map += by * stride + bx; 504 do { 505 for (int x = 0; x < w4; x++) 506 seg_id = imin(seg_id, ref_seg_map[x]); 507 ref_seg_map += stride; 508 } while (--h4 > 0 && seg_id); 509 assert(seg_id < 8); 510 511 return seg_id; 512 } 513 514 static inline void splat_oneref_mv(const Dav1dContext *const c, 515 Dav1dTaskContext *const t, 516 const enum BlockSize bs, 517 const Av1Block *const b, 518 const int bw4, const int bh4) 519 { 520 const enum InterPredMode mode = b->inter_mode; 521 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { 522 .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 }, 523 .mv.mv[0] = b->mv[0], 524 .bs = bs, 525 .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2), 526 }; 527 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); 528 } 529 530 static inline void splat_intrabc_mv(const Dav1dContext *const c, 531 Dav1dTaskContext *const t, 532 const enum BlockSize bs, 533 const Av1Block *const b, 534 const int bw4, const int bh4) 535 { 536 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { 537 .ref.ref = { 0, -1 }, 538 .mv.mv[0] = b->mv[0], 539 .bs = bs, 540 .mf = 0, 541 }; 542 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); 543 } 544 545 static inline void splat_tworef_mv(const Dav1dContext *const c, 546 Dav1dTaskContext *const t, 547 const enum BlockSize bs, 548 const Av1Block *const b, 549 const int bw4, const int bh4) 550 { 551 assert(bw4 >= 2 && bh4 >= 2); 552 const enum CompInterPredMode mode = b->inter_mode; 553 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { 554 .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 }, 555 .mv.mv = { b->mv[0], b->mv[1] }, 556 .bs = bs, 557 .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2, 558 }; 559 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); 560 } 561 562 static inline void splat_intraref(const Dav1dContext *const c, 563 Dav1dTaskContext *const t, 564 const enum BlockSize bs, 565 const int bw4, const int bh4) 566 { 567 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) { 568 .ref.ref = { 0, -1 }, 569 .mv.mv[0].n = INVALID_MV, 570 .bs = bs, 571 .mf = 0, 572 }; 573 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4); 574 } 575 576 static void mc_lowest_px(int *const dst, const int by4, const int bh4, 577 const int mvy, const int ss_ver, 578 const struct ScalableMotionParams *const smp) 579 { 580 const int v_mul = 4 >> ss_ver; 581 if (!smp->scale) { 582 const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver); 583 *dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy); 584 } else { 585 int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver); 586 const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8; 587 y = apply_sign64((int)((llabs(tmp) + 128) >> 8), tmp) + 32; 588 const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4; 589 *dst = imax(*dst, bottom); 590 } 591 } 592 593 static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst, 594 const uint8_t *const b_dim, 595 const Dav1dWarpedMotionParams *const wmp, 596 const int ss_ver, const int ss_hor) 597 { 598 const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver; 599 assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7)); 600 const int32_t *const mat = wmp->matrix; 601 const int y = b_dim[1] * v_mul - 8; // lowest y 602 603 const int src_y = t->by * 4 + ((y + 4) << ss_ver); 604 const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1]; 605 // check left- and right-most blocks 606 for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) { 607 // calculate transformation relative to center of 8x8 block in 608 // luma pixel units 609 const int src_x = t->bx * 4 + ((x + 4) << ss_hor); 610 const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver; 611 const int dy = (int) (mvy >> 16) - 4; 612 *dst = imax(*dst, dy + 4 + 8); 613 } 614 } 615 616 static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst, 617 const uint8_t *const b_dim, 618 const Dav1dWarpedMotionParams *const wmp) 619 { 620 affine_lowest_px(t, dst, b_dim, wmp, 0, 0); 621 } 622 623 static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst, 624 const uint8_t *const b_dim, 625 const Dav1dWarpedMotionParams *const wmp) 626 { 627 const Dav1dFrameContext *const f = t->f; 628 assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400); 629 if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444) 630 affine_lowest_px_luma(t, dst, b_dim, wmp); 631 else 632 affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1); 633 } 634 635 static void obmc_lowest_px(Dav1dTaskContext *const t, 636 int (*const dst)[2], const int is_chroma, 637 const uint8_t *const b_dim, 638 const int bx4, const int by4, const int w4, const int h4) 639 { 640 assert(!(t->bx & 1) && !(t->by & 1)); 641 const Dav1dFrameContext *const f = t->f; 642 /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5]; 643 const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; 644 const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 645 const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver; 646 647 if (t->by > t->ts->tiling.row_start && 648 (!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16)) 649 { 650 for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) { 651 // only odd blocks are considered for overlap handling, hence +1 652 const refmvs_block *const a_r = &r[-1][t->bx + x + 1]; 653 const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs]; 654 655 if (a_r->ref.ref[0] > 0) { 656 const int oh4 = imin(b_dim[1], 16) >> 1; 657 mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by, 658 (oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver, 659 &f->svc[a_r->ref.ref[0] - 1][1]); 660 i++; 661 } 662 x += imax(a_b_dim[0], 2); 663 } 664 } 665 666 if (t->bx > t->ts->tiling.col_start) 667 for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) { 668 // only odd blocks are considered for overlap handling, hence +1 669 const refmvs_block *const l_r = &r[y + 1][t->bx - 1]; 670 const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs]; 671 672 if (l_r->ref.ref[0] > 0) { 673 const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]); 674 mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma], 675 t->by + y, oh4, l_r->mv.mv[0].y, ss_ver, 676 &f->svc[l_r->ref.ref[0] - 1][1]); 677 i++; 678 } 679 y += imax(l_b_dim[1], 2); 680 } 681 } 682 683 static int decode_b(Dav1dTaskContext *const t, 684 const enum BlockLevel bl, 685 const enum BlockSize bs, 686 const enum BlockPartition bp, 687 const enum EdgeFlags intra_edge_flags) { 688 Dav1dTileState *const ts = t->ts; 689 const Dav1dFrameContext *const f = t->f; 690 Av1Block b_mem, *const b = t->frame_thread.pass ? 691 &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem; 692 const uint8_t *const b_dim = dav1d_block_dimensions[bs]; 693 const int bx4 = t->bx & 31, by4 = t->by & 31; 694 const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; 695 const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 696 const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver; 697 const int bw4 = b_dim[0], bh4 = b_dim[1]; 698 const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by); 699 const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver; 700 const int have_left = t->bx > ts->tiling.col_start; 701 const int have_top = t->by > ts->tiling.row_start; 702 const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 && 703 (bw4 > ss_hor || t->bx & 1) && 704 (bh4 > ss_ver || t->by & 1); 705 706 if (t->frame_thread.pass == 2) { 707 if (b->intra) { 708 f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b); 709 710 const enum IntraPredMode y_mode_nofilt = 711 b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode; 712 #define set_ctx(rep_macro) \ 713 rep_macro(edge->mode, off, y_mode_nofilt); \ 714 rep_macro(edge->intra, off, 1) 715 BlockContext *edge = t->a; 716 for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { 717 case_set(b_dim[2 + i]); 718 } 719 #undef set_ctx 720 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { 721 refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx]; 722 for (int x = 0; x < bw4; x++) { 723 r[x].ref.ref[0] = 0; 724 r[x].bs = bs; 725 } 726 refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5]; 727 for (int y = 0; y < bh4 - 1; y++) { 728 rr[y][t->bx + bw4 - 1].ref.ref[0] = 0; 729 rr[y][t->bx + bw4 - 1].bs = bs; 730 } 731 } 732 733 if (has_chroma) { 734 uint8_t uv_mode = b->uv_mode; 735 dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode); 736 dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode); 737 } 738 } else { 739 if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ && 740 b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP) 741 { 742 if (b->matrix[0] == INT16_MIN) { 743 t->warpmv.type = DAV1D_WM_TYPE_IDENTITY; 744 } else { 745 t->warpmv.type = DAV1D_WM_TYPE_AFFINE; 746 t->warpmv.matrix[2] = b->matrix[0] + 0x10000; 747 t->warpmv.matrix[3] = b->matrix[1]; 748 t->warpmv.matrix[4] = b->matrix[2]; 749 t->warpmv.matrix[5] = b->matrix[3] + 0x10000; 750 dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv, 751 t->bx, t->by); 752 dav1d_get_shear_params(&t->warpmv); 753 #define signabs(v) v < 0 ? '-' : ' ', abs(v) 754 if (DEBUG_BLOCK_INFO) 755 printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n" 756 "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n", 757 signabs(t->warpmv.matrix[0]), 758 signabs(t->warpmv.matrix[1]), 759 signabs(t->warpmv.matrix[2]), 760 signabs(t->warpmv.matrix[3]), 761 signabs(t->warpmv.matrix[4]), 762 signabs(t->warpmv.matrix[5]), 763 signabs(t->warpmv.u.p.alpha), 764 signabs(t->warpmv.u.p.beta), 765 signabs(t->warpmv.u.p.gamma), 766 signabs(t->warpmv.u.p.delta), 767 b->mv2d.y, b->mv2d.x); 768 #undef signabs 769 } 770 } 771 if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; 772 773 const uint8_t *const filter = dav1d_filter_dir[b->filter2d]; 774 BlockContext *edge = t->a; 775 for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { 776 #define set_ctx(rep_macro) \ 777 rep_macro(edge->filter[0], off, filter[0]); \ 778 rep_macro(edge->filter[1], off, filter[1]); \ 779 rep_macro(edge->intra, off, 0) 780 case_set(b_dim[2 + i]); 781 #undef set_ctx 782 } 783 784 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { 785 refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx]; 786 for (int x = 0; x < bw4; x++) { 787 r[x].ref.ref[0] = b->ref[0] + 1; 788 r[x].mv.mv[0] = b->mv[0]; 789 r[x].bs = bs; 790 } 791 refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5]; 792 for (int y = 0; y < bh4 - 1; y++) { 793 rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1; 794 rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0]; 795 rr[y][t->bx + bw4 - 1].bs = bs; 796 } 797 } 798 799 if (has_chroma) { 800 dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED); 801 dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED); 802 } 803 } 804 return 0; 805 } 806 807 const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver; 808 809 b->bl = bl; 810 b->bp = bp; 811 b->bs = bs; 812 813 const Dav1dSegmentationData *seg = NULL; 814 815 // segment_id (if seg_feature for skip/ref/gmv is enabled) 816 int seg_pred = 0; 817 if (f->frame_hdr->segmentation.enabled) { 818 if (!f->frame_hdr->segmentation.update_map) { 819 if (f->prev_segmap) { 820 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4, 821 f->prev_segmap, 822 f->b4_stride); 823 if (seg_id >= 8) return -1; 824 b->seg_id = seg_id; 825 } else { 826 b->seg_id = 0; 827 } 828 seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id]; 829 } else if (f->frame_hdr->segmentation.seg_data.preskip) { 830 if (f->frame_hdr->segmentation.temporal && 831 (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac, 832 ts->cdf.m.seg_pred[t->a->seg_pred[bx4] + 833 t->l.seg_pred[by4]]))) 834 { 835 // temporal predicted seg_id 836 if (f->prev_segmap) { 837 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, 838 w4, h4, 839 f->prev_segmap, 840 f->b4_stride); 841 if (seg_id >= 8) return -1; 842 b->seg_id = seg_id; 843 } else { 844 b->seg_id = 0; 845 } 846 } else { 847 int seg_ctx; 848 const unsigned pred_seg_id = 849 get_cur_frame_segid(t->by, t->bx, have_top, have_left, 850 &seg_ctx, f->cur_segmap, f->b4_stride); 851 const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac, 852 ts->cdf.m.seg_id[seg_ctx], 853 DAV1D_MAX_SEGMENTS - 1); 854 const unsigned last_active_seg_id = 855 f->frame_hdr->segmentation.seg_data.last_active_segid; 856 b->seg_id = neg_deinterleave(diff, pred_seg_id, 857 last_active_seg_id + 1); 858 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error? 859 if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error? 860 } 861 862 if (DEBUG_BLOCK_INFO) 863 printf("Post-segid[preskip;%d]: r=%d\n", 864 b->seg_id, ts->msac.rng); 865 866 seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id]; 867 } 868 } else { 869 b->seg_id = 0; 870 } 871 872 // skip_mode 873 if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) && 874 f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1) 875 { 876 const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4]; 877 b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac, 878 ts->cdf.m.skip_mode[smctx]); 879 if (DEBUG_BLOCK_INFO) 880 printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng); 881 } else { 882 b->skip_mode = 0; 883 } 884 885 // skip 886 if (b->skip_mode || (seg && seg->skip)) { 887 b->skip = 1; 888 } else { 889 const int sctx = t->a->skip[bx4] + t->l.skip[by4]; 890 b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]); 891 if (DEBUG_BLOCK_INFO) 892 printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng); 893 } 894 895 // segment_id 896 if (f->frame_hdr->segmentation.enabled && 897 f->frame_hdr->segmentation.update_map && 898 !f->frame_hdr->segmentation.seg_data.preskip) 899 { 900 if (!b->skip && f->frame_hdr->segmentation.temporal && 901 (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac, 902 ts->cdf.m.seg_pred[t->a->seg_pred[bx4] + 903 t->l.seg_pred[by4]]))) 904 { 905 // temporal predicted seg_id 906 if (f->prev_segmap) { 907 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4, 908 f->prev_segmap, 909 f->b4_stride); 910 if (seg_id >= 8) return -1; 911 b->seg_id = seg_id; 912 } else { 913 b->seg_id = 0; 914 } 915 } else { 916 int seg_ctx; 917 const unsigned pred_seg_id = 918 get_cur_frame_segid(t->by, t->bx, have_top, have_left, 919 &seg_ctx, f->cur_segmap, f->b4_stride); 920 if (b->skip) { 921 b->seg_id = pred_seg_id; 922 } else { 923 const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac, 924 ts->cdf.m.seg_id[seg_ctx], 925 DAV1D_MAX_SEGMENTS - 1); 926 const unsigned last_active_seg_id = 927 f->frame_hdr->segmentation.seg_data.last_active_segid; 928 b->seg_id = neg_deinterleave(diff, pred_seg_id, 929 last_active_seg_id + 1); 930 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error? 931 } 932 if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error? 933 } 934 935 seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id]; 936 937 if (DEBUG_BLOCK_INFO) 938 printf("Post-segid[postskip;%d]: r=%d\n", 939 b->seg_id, ts->msac.rng); 940 } 941 942 // cdef index 943 if (!b->skip) { 944 const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) + 945 ((t->by & 16) >> 3) : 0; 946 if (t->cur_sb_cdef_idx_ptr[idx] == -1) { 947 const int v = dav1d_msac_decode_bools(&ts->msac, 948 f->frame_hdr->cdef.n_bits); 949 t->cur_sb_cdef_idx_ptr[idx] = v; 950 if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v; 951 if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v; 952 if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v; 953 954 if (DEBUG_BLOCK_INFO) 955 printf("Post-cdef_idx[%d]: r=%d\n", 956 *t->cur_sb_cdef_idx_ptr, ts->msac.rng); 957 } 958 } 959 960 // delta-q/lf 961 if (!((t->bx | t->by) & (31 >> !f->seq_hdr->sb128))) { 962 const int prev_qidx = ts->last_qidx; 963 const int have_delta_q = f->frame_hdr->delta.q.present && 964 (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip); 965 966 uint32_t prev_delta_lf = ts->last_delta_lf.u32; 967 968 if (have_delta_q) { 969 int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac, 970 ts->cdf.m.delta_q, 3); 971 if (delta_q == 3) { 972 const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3); 973 delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) + 974 1 + (1 << n_bits); 975 } 976 if (delta_q) { 977 if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q; 978 delta_q *= 1 << f->frame_hdr->delta.q.res_log2; 979 } 980 ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255); 981 if (have_delta_q && DEBUG_BLOCK_INFO) 982 printf("Post-delta_q[%d->%d]: r=%d\n", 983 delta_q, ts->last_qidx, ts->msac.rng); 984 985 if (f->frame_hdr->delta.lf.present) { 986 const int n_lfs = f->frame_hdr->delta.lf.multi ? 987 f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1; 988 989 for (int i = 0; i < n_lfs; i++) { 990 int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac, 991 ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3); 992 if (delta_lf == 3) { 993 const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3); 994 delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) + 995 1 + (1 << n_bits); 996 } 997 if (delta_lf) { 998 if (dav1d_msac_decode_bool_equi(&ts->msac)) 999 delta_lf = -delta_lf; 1000 delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2; 1001 } 1002 ts->last_delta_lf.i8[i] = 1003 iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63); 1004 if (have_delta_q && DEBUG_BLOCK_INFO) 1005 printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf, 1006 ts->msac.rng); 1007 } 1008 } 1009 } 1010 if (ts->last_qidx == f->frame_hdr->quant.yac) { 1011 // assign frame-wide q values to this sb 1012 ts->dq = f->dq; 1013 } else if (ts->last_qidx != prev_qidx) { 1014 // find sb-specific quant parameters 1015 init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem); 1016 ts->dq = ts->dqmem; 1017 } 1018 if (!ts->last_delta_lf.u32) { 1019 // assign frame-wide lf values to this sb 1020 ts->lflvl = f->lf.lvl; 1021 } else if (ts->last_delta_lf.u32 != prev_delta_lf) { 1022 // find sb-specific lf lvl parameters 1023 ts->lflvl = ts->lflvlmem; 1024 dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8); 1025 } 1026 } 1027 1028 if (b->skip_mode) { 1029 b->intra = 0; 1030 } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) { 1031 if (seg && (seg->ref >= 0 || seg->globalmv)) { 1032 b->intra = !seg->ref; 1033 } else { 1034 const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4, 1035 have_top, have_left); 1036 b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, 1037 ts->cdf.m.intra[ictx]); 1038 if (DEBUG_BLOCK_INFO) 1039 printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng); 1040 } 1041 } else if (f->frame_hdr->allow_intrabc) { 1042 b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc); 1043 if (DEBUG_BLOCK_INFO) 1044 printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng); 1045 } else { 1046 b->intra = 1; 1047 } 1048 1049 // intra/inter-specific stuff 1050 if (b->intra) { 1051 uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ? 1052 ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] : 1053 ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]] 1054 [dav1d_intra_mode_context[t->l.mode[by4]]]; 1055 b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf, 1056 N_INTRA_PRED_MODES - 1); 1057 if (DEBUG_BLOCK_INFO) 1058 printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng); 1059 1060 // angle delta 1061 if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED && 1062 b->y_mode <= VERT_LEFT_PRED) 1063 { 1064 uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED]; 1065 const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6); 1066 b->y_angle = angle - 3; 1067 } else { 1068 b->y_angle = 0; 1069 } 1070 1071 if (has_chroma) { 1072 const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ? 1073 cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs)); 1074 uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode]; 1075 b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf, 1076 N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed); 1077 if (DEBUG_BLOCK_INFO) 1078 printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng); 1079 1080 b->uv_angle = 0; 1081 if (b->uv_mode == CFL_PRED) { 1082 #define SIGN(a) (!!(a) + ((a) > 0)) 1083 const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac, 1084 ts->cdf.m.cfl_sign, 7) + 1; 1085 const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3; 1086 assert(sign_u == sign / 3); 1087 if (sign_u) { 1088 const int ctx = (sign_u == 2) * 3 + sign_v; 1089 b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac, 1090 ts->cdf.m.cfl_alpha[ctx], 15) + 1; 1091 if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0]; 1092 } else { 1093 b->cfl_alpha[0] = 0; 1094 } 1095 if (sign_v) { 1096 const int ctx = (sign_v == 2) * 3 + sign_u; 1097 b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac, 1098 ts->cdf.m.cfl_alpha[ctx], 15) + 1; 1099 if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1]; 1100 } else { 1101 b->cfl_alpha[1] = 0; 1102 } 1103 #undef SIGN 1104 if (DEBUG_BLOCK_INFO) 1105 printf("Post-uvalphas[%d/%d]: r=%d\n", 1106 b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng); 1107 } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED && 1108 b->uv_mode <= VERT_LEFT_PRED) 1109 { 1110 uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED]; 1111 const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6); 1112 b->uv_angle = angle - 3; 1113 } 1114 } 1115 1116 b->pal_sz[0] = b->pal_sz[1] = 0; 1117 if (f->frame_hdr->allow_screen_content_tools && 1118 imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4) 1119 { 1120 const int sz_ctx = b_dim[2] + b_dim[3] - 2; 1121 if (b->y_mode == DC_PRED) { 1122 const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0); 1123 const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac, 1124 ts->cdf.m.pal_y[sz_ctx][pal_ctx]); 1125 if (DEBUG_BLOCK_INFO) 1126 printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng); 1127 if (use_y_pal) 1128 f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4); 1129 } 1130 1131 if (has_chroma && b->uv_mode == DC_PRED) { 1132 const int pal_ctx = b->pal_sz[0] > 0; 1133 const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac, 1134 ts->cdf.m.pal_uv[pal_ctx]); 1135 if (DEBUG_BLOCK_INFO) 1136 printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng); 1137 if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates 1138 f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4); 1139 } 1140 } 1141 1142 if (b->y_mode == DC_PRED && !b->pal_sz[0] && 1143 imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra) 1144 { 1145 const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac, 1146 ts->cdf.m.use_filter_intra[bs]); 1147 if (is_filter) { 1148 b->y_mode = FILTER_PRED; 1149 b->y_angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, 1150 ts->cdf.m.filter_intra, 4); 1151 } 1152 if (DEBUG_BLOCK_INFO) 1153 printf("Post-filterintramode[%d/%d]: r=%d\n", 1154 b->y_mode, b->y_angle, ts->msac.rng); 1155 } 1156 1157 if (b->pal_sz[0]) { 1158 uint8_t *pal_idx; 1159 if (t->frame_thread.pass) { 1160 const int p = t->frame_thread.pass & 1; 1161 assert(ts->frame_thread[p].pal_idx); 1162 pal_idx = ts->frame_thread[p].pal_idx; 1163 ts->frame_thread[p].pal_idx += bw4 * bh4 * 8; 1164 } else 1165 pal_idx = t->scratch.pal_idx_y; 1166 read_pal_indices(t, pal_idx, b->pal_sz[0], 0, w4, h4, bw4, bh4); 1167 if (DEBUG_BLOCK_INFO) 1168 printf("Post-y-pal-indices: r=%d\n", ts->msac.rng); 1169 } 1170 1171 if (has_chroma && b->pal_sz[1]) { 1172 uint8_t *pal_idx; 1173 if (t->frame_thread.pass) { 1174 const int p = t->frame_thread.pass & 1; 1175 assert(ts->frame_thread[p].pal_idx); 1176 pal_idx = ts->frame_thread[p].pal_idx; 1177 ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8; 1178 } else 1179 pal_idx = t->scratch.pal_idx_uv; 1180 read_pal_indices(t, pal_idx, b->pal_sz[1], 1, cw4, ch4, cbw4, cbh4); 1181 if (DEBUG_BLOCK_INFO) 1182 printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng); 1183 } 1184 1185 const TxfmInfo *t_dim; 1186 if (f->frame_hdr->segmentation.lossless[b->seg_id]) { 1187 b->tx = b->uvtx = (int) TX_4X4; 1188 t_dim = &dav1d_txfm_dimensions[TX_4X4]; 1189 } else { 1190 b->tx = dav1d_max_txfm_size_for_bs[bs][0]; 1191 b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout]; 1192 t_dim = &dav1d_txfm_dimensions[b->tx]; 1193 if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) { 1194 const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4); 1195 uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx]; 1196 int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf, 1197 imin(t_dim->max, 2)); 1198 1199 while (depth--) { 1200 b->tx = t_dim->sub; 1201 t_dim = &dav1d_txfm_dimensions[b->tx]; 1202 } 1203 } 1204 if (DEBUG_BLOCK_INFO) 1205 printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng); 1206 } 1207 1208 // reconstruction 1209 if (t->frame_thread.pass == 1) { 1210 f->bd_fn.read_coef_blocks(t, bs, b); 1211 } else { 1212 f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b); 1213 } 1214 1215 if (f->frame_hdr->loopfilter.level_y[0] || 1216 f->frame_hdr->loopfilter.level_y[1]) 1217 { 1218 dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride, 1219 (const uint8_t (*)[8][2]) 1220 &ts->lflvl[b->seg_id][0][0][0], 1221 t->bx, t->by, f->w4, f->h4, bs, 1222 b->tx, b->uvtx, f->cur.p.layout, 1223 &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4], 1224 has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL, 1225 has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL); 1226 } 1227 // update contexts 1228 const enum IntraPredMode y_mode_nofilt = 1229 b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode; 1230 BlockContext *edge = t->a; 1231 for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { 1232 int t_lsz = ((uint8_t *) &t_dim->lw)[i]; // lw then lh 1233 #define set_ctx(rep_macro) \ 1234 rep_macro(edge->tx_intra, off, t_lsz); \ 1235 rep_macro(edge->tx, off, t_lsz); \ 1236 rep_macro(edge->mode, off, y_mode_nofilt); \ 1237 rep_macro(edge->pal_sz, off, b->pal_sz[0]); \ 1238 rep_macro(edge->seg_pred, off, seg_pred); \ 1239 rep_macro(edge->skip_mode, off, 0); \ 1240 rep_macro(edge->intra, off, 1); \ 1241 rep_macro(edge->skip, off, b->skip); \ 1242 /* see aomedia bug 2183 for why we use luma coordinates here */ \ 1243 rep_macro(t->pal_sz_uv[i], off, (has_chroma ? b->pal_sz[1] : 0)); \ 1244 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \ 1245 rep_macro(edge->comp_type, off, COMP_INTER_NONE); \ 1246 rep_macro(edge->ref[0], off, ((uint8_t) -1)); \ 1247 rep_macro(edge->ref[1], off, ((uint8_t) -1)); \ 1248 rep_macro(edge->filter[0], off, DAV1D_N_SWITCHABLE_FILTERS); \ 1249 rep_macro(edge->filter[1], off, DAV1D_N_SWITCHABLE_FILTERS); \ 1250 } 1251 case_set(b_dim[2 + i]); 1252 #undef set_ctx 1253 } 1254 if (b->pal_sz[0]) 1255 f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4); 1256 if (has_chroma) { 1257 uint8_t uv_mode = b->uv_mode; 1258 dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], uv_mode); 1259 dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], uv_mode); 1260 if (b->pal_sz[1]) 1261 f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4); 1262 } 1263 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) 1264 splat_intraref(f->c, t, bs, bw4, bh4); 1265 } else if (IS_KEY_OR_INTRA(f->frame_hdr)) { 1266 // intra block copy 1267 refmvs_candidate mvstack[8]; 1268 int n_mvs, ctx; 1269 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, 1270 (union refmvs_refpair) { .ref = { 0, -1 }}, 1271 bs, intra_edge_flags, t->by, t->bx); 1272 1273 if (mvstack[0].mv.mv[0].n) 1274 b->mv[0] = mvstack[0].mv.mv[0]; 1275 else if (mvstack[1].mv.mv[0].n) 1276 b->mv[0] = mvstack[1].mv.mv[0]; 1277 else { 1278 if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) { 1279 b->mv[0].y = 0; 1280 b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048; 1281 } else { 1282 b->mv[0].y = -(512 << f->seq_hdr->sb128); 1283 b->mv[0].x = 0; 1284 } 1285 } 1286 1287 const union mv ref = b->mv[0]; 1288 read_mv_residual(ts, &b->mv[0], -1); 1289 1290 // clip intrabc motion vector to decoded parts of current tile 1291 int border_left = ts->tiling.col_start * 4; 1292 int border_top = ts->tiling.row_start * 4; 1293 if (has_chroma) { 1294 if (bw4 < 2 && ss_hor) 1295 border_left += 4; 1296 if (bh4 < 2 && ss_ver) 1297 border_top += 4; 1298 } 1299 int src_left = t->bx * 4 + (b->mv[0].x >> 3); 1300 int src_top = t->by * 4 + (b->mv[0].y >> 3); 1301 int src_right = src_left + bw4 * 4; 1302 int src_bottom = src_top + bh4 * 4; 1303 const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4; 1304 1305 // check against left or right tile boundary and adjust if necessary 1306 if (src_left < border_left) { 1307 src_right += border_left - src_left; 1308 src_left += border_left - src_left; 1309 } else if (src_right > border_right) { 1310 src_left -= src_right - border_right; 1311 src_right -= src_right - border_right; 1312 } 1313 // check against top tile boundary and adjust if necessary 1314 if (src_top < border_top) { 1315 src_bottom += border_top - src_top; 1316 src_top += border_top - src_top; 1317 } 1318 1319 const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128); 1320 const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128); 1321 const int sb_size = 1 << (6 + f->seq_hdr->sb128); 1322 // check for overlap with current superblock 1323 if (src_bottom > sby && src_right > sbx) { 1324 if (src_top - border_top >= src_bottom - sby) { 1325 // if possible move src up into the previous suberblock row 1326 src_top -= src_bottom - sby; 1327 src_bottom -= src_bottom - sby; 1328 } else if (src_left - border_left >= src_right - sbx) { 1329 // if possible move src left into the previous suberblock 1330 src_left -= src_right - sbx; 1331 src_right -= src_right - sbx; 1332 } 1333 } 1334 // move src up if it is below current superblock row 1335 if (src_bottom > sby + sb_size) { 1336 src_top -= src_bottom - (sby + sb_size); 1337 src_bottom -= src_bottom - (sby + sb_size); 1338 } 1339 // error out if mv still overlaps with the current superblock 1340 if (src_bottom > sby && src_right > sbx) 1341 return -1; 1342 1343 b->mv[0].x = (src_left - t->bx * 4) * 8; 1344 b->mv[0].y = (src_top - t->by * 4) * 8; 1345 1346 if (DEBUG_BLOCK_INFO) 1347 printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n", 1348 b->mv[0].y, b->mv[0].x, ref.y, ref.x, 1349 mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng); 1350 read_vartx_tree(t, b, bs, bx4, by4); 1351 1352 // reconstruction 1353 if (t->frame_thread.pass == 1) { 1354 f->bd_fn.read_coef_blocks(t, bs, b); 1355 b->filter2d = FILTER_2D_BILINEAR; 1356 } else { 1357 if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; 1358 } 1359 1360 splat_intrabc_mv(f->c, t, bs, b, bw4, bh4); 1361 BlockContext *edge = t->a; 1362 for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { 1363 #define set_ctx(rep_macro) \ 1364 rep_macro(edge->tx_intra, off, b_dim[2 + i]); \ 1365 rep_macro(edge->mode, off, DC_PRED); \ 1366 rep_macro(edge->pal_sz, off, 0); \ 1367 /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \ 1368 rep_macro(t->pal_sz_uv[i], off, 0); \ 1369 rep_macro(edge->seg_pred, off, seg_pred); \ 1370 rep_macro(edge->skip_mode, off, 0); \ 1371 rep_macro(edge->intra, off, 0); \ 1372 rep_macro(edge->skip, off, b->skip) 1373 case_set(b_dim[2 + i]); 1374 #undef set_ctx 1375 } 1376 if (has_chroma) { 1377 dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED); 1378 dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED); 1379 } 1380 } else { 1381 // inter-specific mode/mv coding 1382 int is_comp, has_subpel_filter; 1383 1384 if (b->skip_mode) { 1385 is_comp = 1; 1386 } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) && 1387 f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1) 1388 { 1389 const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4, 1390 have_top, have_left); 1391 is_comp = dav1d_msac_decode_bool_adapt(&ts->msac, 1392 ts->cdf.m.comp[ctx]); 1393 if (DEBUG_BLOCK_INFO) 1394 printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng); 1395 } else { 1396 is_comp = 0; 1397 } 1398 1399 if (b->skip_mode) { 1400 b->ref[0] = f->frame_hdr->skip_mode_refs[0]; 1401 b->ref[1] = f->frame_hdr->skip_mode_refs[1]; 1402 b->comp_type = COMP_INTER_AVG; 1403 b->inter_mode = NEARESTMV_NEARESTMV; 1404 b->drl_idx = NEAREST_DRL; 1405 has_subpel_filter = 0; 1406 1407 refmvs_candidate mvstack[8]; 1408 int n_mvs, ctx; 1409 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, 1410 (union refmvs_refpair) { .ref = { 1411 b->ref[0] + 1, b->ref[1] + 1 }}, 1412 bs, intra_edge_flags, t->by, t->bx); 1413 1414 b->mv[0] = mvstack[0].mv.mv[0]; 1415 b->mv[1] = mvstack[0].mv.mv[1]; 1416 fix_mv_precision(f->frame_hdr, &b->mv[0]); 1417 fix_mv_precision(f->frame_hdr, &b->mv[1]); 1418 if (DEBUG_BLOCK_INFO) 1419 printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n", 1420 b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x, 1421 b->ref[0], b->ref[1]); 1422 } else if (is_comp) { 1423 const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4, 1424 have_top, have_left); 1425 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1426 ts->cdf.m.comp_dir[dir_ctx])) 1427 { 1428 // bidir - first reference (fw) 1429 const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4, 1430 have_top, have_left); 1431 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1432 ts->cdf.m.comp_fwd_ref[0][ctx1])) 1433 { 1434 const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4, 1435 have_top, have_left); 1436 b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac, 1437 ts->cdf.m.comp_fwd_ref[2][ctx2]); 1438 } else { 1439 const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4, 1440 have_top, have_left); 1441 b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac, 1442 ts->cdf.m.comp_fwd_ref[1][ctx2]); 1443 } 1444 1445 // second reference (bw) 1446 const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4, 1447 have_top, have_left); 1448 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1449 ts->cdf.m.comp_bwd_ref[0][ctx3])) 1450 { 1451 b->ref[1] = 6; 1452 } else { 1453 const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4, 1454 have_top, have_left); 1455 b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac, 1456 ts->cdf.m.comp_bwd_ref[1][ctx4]); 1457 } 1458 } else { 1459 // unidir 1460 const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4, 1461 have_top, have_left); 1462 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1463 ts->cdf.m.comp_uni_ref[0][uctx_p])) 1464 { 1465 b->ref[0] = 4; 1466 b->ref[1] = 6; 1467 } else { 1468 const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4, 1469 have_top, have_left); 1470 b->ref[0] = 0; 1471 b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac, 1472 ts->cdf.m.comp_uni_ref[1][uctx_p1]); 1473 if (b->ref[1] == 2) { 1474 const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4, 1475 have_top, have_left); 1476 b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac, 1477 ts->cdf.m.comp_uni_ref[2][uctx_p2]); 1478 } 1479 } 1480 } 1481 if (DEBUG_BLOCK_INFO) 1482 printf("Post-refs[%d/%d]: r=%d\n", 1483 b->ref[0], b->ref[1], ts->msac.rng); 1484 1485 refmvs_candidate mvstack[8]; 1486 int n_mvs, ctx; 1487 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, 1488 (union refmvs_refpair) { .ref = { 1489 b->ref[0] + 1, b->ref[1] + 1 }}, 1490 bs, intra_edge_flags, t->by, t->bx); 1491 1492 b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac, 1493 ts->cdf.m.comp_inter_mode[ctx], 1494 N_COMP_INTER_PRED_MODES - 1); 1495 if (DEBUG_BLOCK_INFO) 1496 printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n", 1497 b->inter_mode, ctx, n_mvs, ts->msac.rng); 1498 1499 const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode]; 1500 b->drl_idx = NEAREST_DRL; 1501 if (b->inter_mode == NEWMV_NEWMV) { 1502 if (n_mvs > 1) { // NEARER, NEAR or NEARISH 1503 const int drl_ctx_v1 = get_drl_context(mvstack, 0); 1504 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1505 ts->cdf.m.drl_bit[drl_ctx_v1]); 1506 if (b->drl_idx == NEARER_DRL && n_mvs > 2) { 1507 const int drl_ctx_v2 = get_drl_context(mvstack, 1); 1508 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1509 ts->cdf.m.drl_bit[drl_ctx_v2]); 1510 } 1511 if (DEBUG_BLOCK_INFO) 1512 printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n", 1513 b->drl_idx, n_mvs, ts->msac.rng); 1514 } 1515 } else if (im[0] == NEARMV || im[1] == NEARMV) { 1516 b->drl_idx = NEARER_DRL; 1517 if (n_mvs > 2) { // NEAR or NEARISH 1518 const int drl_ctx_v2 = get_drl_context(mvstack, 1); 1519 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1520 ts->cdf.m.drl_bit[drl_ctx_v2]); 1521 if (b->drl_idx == NEAR_DRL && n_mvs > 3) { 1522 const int drl_ctx_v3 = get_drl_context(mvstack, 2); 1523 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1524 ts->cdf.m.drl_bit[drl_ctx_v3]); 1525 } 1526 if (DEBUG_BLOCK_INFO) 1527 printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n", 1528 b->drl_idx, n_mvs, ts->msac.rng); 1529 } 1530 } 1531 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL); 1532 1533 #define assign_comp_mv(idx) \ 1534 switch (im[idx]) { \ 1535 case NEARMV: \ 1536 case NEARESTMV: \ 1537 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \ 1538 fix_mv_precision(f->frame_hdr, &b->mv[idx]); \ 1539 break; \ 1540 case GLOBALMV: \ 1541 has_subpel_filter |= \ 1542 f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \ 1543 b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \ 1544 t->bx, t->by, bw4, bh4, f->frame_hdr); \ 1545 break; \ 1546 case NEWMV: \ 1547 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \ 1548 const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \ 1549 read_mv_residual(ts, &b->mv[idx], mv_prec); \ 1550 break; \ 1551 } 1552 has_subpel_filter = imin(bw4, bh4) == 1 || 1553 b->inter_mode != GLOBALMV_GLOBALMV; 1554 assign_comp_mv(0); 1555 assign_comp_mv(1); 1556 #undef assign_comp_mv 1557 if (DEBUG_BLOCK_INFO) 1558 printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n", 1559 b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x, 1560 ts->msac.rng); 1561 1562 // jnt_comp vs. seg vs. wedge 1563 int is_segwedge = 0; 1564 if (f->seq_hdr->masked_compound) { 1565 const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4); 1566 1567 is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac, 1568 ts->cdf.m.mask_comp[mask_ctx]); 1569 if (DEBUG_BLOCK_INFO) 1570 printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n", 1571 is_segwedge, mask_ctx, ts->msac.rng); 1572 } 1573 1574 if (!is_segwedge) { 1575 if (f->seq_hdr->jnt_comp) { 1576 const int jnt_ctx = 1577 get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits, 1578 f->cur.frame_hdr->frame_offset, 1579 f->refp[b->ref[0]].p.frame_hdr->frame_offset, 1580 f->refp[b->ref[1]].p.frame_hdr->frame_offset, 1581 t->a, &t->l, by4, bx4); 1582 b->comp_type = COMP_INTER_WEIGHTED_AVG + 1583 dav1d_msac_decode_bool_adapt(&ts->msac, 1584 ts->cdf.m.jnt_comp[jnt_ctx]); 1585 if (DEBUG_BLOCK_INFO) 1586 printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n", 1587 b->comp_type == COMP_INTER_AVG, 1588 jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4], 1589 t->l.comp_type[by4], t->l.ref[0][by4], 1590 ts->msac.rng); 1591 } else { 1592 b->comp_type = COMP_INTER_AVG; 1593 } 1594 } else { 1595 if (wedge_allowed_mask & (1 << bs)) { 1596 const int ctx = dav1d_wedge_ctx_lut[bs]; 1597 b->comp_type = COMP_INTER_WEDGE - 1598 dav1d_msac_decode_bool_adapt(&ts->msac, 1599 ts->cdf.m.wedge_comp[ctx]); 1600 if (b->comp_type == COMP_INTER_WEDGE) 1601 b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac, 1602 ts->cdf.m.wedge_idx[ctx], 15); 1603 } else { 1604 b->comp_type = COMP_INTER_SEG; 1605 } 1606 b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac); 1607 if (DEBUG_BLOCK_INFO) 1608 printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n", 1609 b->comp_type == COMP_INTER_WEDGE, 1610 b->wedge_idx, b->mask_sign, ts->msac.rng); 1611 } 1612 } else { 1613 b->comp_type = COMP_INTER_NONE; 1614 1615 // ref 1616 if (seg && seg->ref > 0) { 1617 b->ref[0] = seg->ref - 1; 1618 } else if (seg && (seg->globalmv || seg->skip)) { 1619 b->ref[0] = 0; 1620 } else { 1621 const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4, 1622 have_top, have_left); 1623 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1624 ts->cdf.m.ref[0][ctx1])) 1625 { 1626 const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4, 1627 have_top, have_left); 1628 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1629 ts->cdf.m.ref[1][ctx2])) 1630 { 1631 b->ref[0] = 6; 1632 } else { 1633 const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4, 1634 have_top, have_left); 1635 b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac, 1636 ts->cdf.m.ref[5][ctx3]); 1637 } 1638 } else { 1639 const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4, 1640 have_top, have_left); 1641 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1642 ts->cdf.m.ref[2][ctx2])) 1643 { 1644 const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4, 1645 have_top, have_left); 1646 b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac, 1647 ts->cdf.m.ref[4][ctx3]); 1648 } else { 1649 const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4, 1650 have_top, have_left); 1651 b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac, 1652 ts->cdf.m.ref[3][ctx3]); 1653 } 1654 } 1655 if (DEBUG_BLOCK_INFO) 1656 printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng); 1657 } 1658 b->ref[1] = -1; 1659 1660 refmvs_candidate mvstack[8]; 1661 int n_mvs, ctx; 1662 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx, 1663 (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }}, 1664 bs, intra_edge_flags, t->by, t->bx); 1665 1666 // mode parsing and mv derivation from ref_mvs 1667 if ((seg && (seg->skip || seg->globalmv)) || 1668 dav1d_msac_decode_bool_adapt(&ts->msac, 1669 ts->cdf.m.newmv_mode[ctx & 7])) 1670 { 1671 if ((seg && (seg->skip || seg->globalmv)) || 1672 !dav1d_msac_decode_bool_adapt(&ts->msac, 1673 ts->cdf.m.globalmv_mode[(ctx >> 3) & 1])) 1674 { 1675 b->inter_mode = GLOBALMV; 1676 b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]], 1677 t->bx, t->by, bw4, bh4, f->frame_hdr); 1678 has_subpel_filter = imin(bw4, bh4) == 1 || 1679 f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION; 1680 } else { 1681 has_subpel_filter = 1; 1682 if (dav1d_msac_decode_bool_adapt(&ts->msac, 1683 ts->cdf.m.refmv_mode[(ctx >> 4) & 15])) 1684 { // NEAREST, NEARER, NEAR or NEARISH 1685 b->inter_mode = NEARMV; 1686 b->drl_idx = NEARER_DRL; 1687 if (n_mvs > 2) { // NEARER, NEAR or NEARISH 1688 const int drl_ctx_v2 = get_drl_context(mvstack, 1); 1689 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1690 ts->cdf.m.drl_bit[drl_ctx_v2]); 1691 if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH 1692 const int drl_ctx_v3 = 1693 get_drl_context(mvstack, 2); 1694 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1695 ts->cdf.m.drl_bit[drl_ctx_v3]); 1696 } 1697 } 1698 } else { 1699 b->inter_mode = NEARESTMV; 1700 b->drl_idx = NEAREST_DRL; 1701 } 1702 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL); 1703 b->mv[0] = mvstack[b->drl_idx].mv.mv[0]; 1704 if (b->drl_idx < NEAR_DRL) 1705 fix_mv_precision(f->frame_hdr, &b->mv[0]); 1706 } 1707 1708 if (DEBUG_BLOCK_INFO) 1709 printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n", 1710 b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs, 1711 ts->msac.rng); 1712 } else { 1713 has_subpel_filter = 1; 1714 b->inter_mode = NEWMV; 1715 b->drl_idx = NEAREST_DRL; 1716 if (n_mvs > 1) { // NEARER, NEAR or NEARISH 1717 const int drl_ctx_v1 = get_drl_context(mvstack, 0); 1718 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1719 ts->cdf.m.drl_bit[drl_ctx_v1]); 1720 if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH 1721 const int drl_ctx_v2 = get_drl_context(mvstack, 1); 1722 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac, 1723 ts->cdf.m.drl_bit[drl_ctx_v2]); 1724 } 1725 } 1726 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL); 1727 if (n_mvs > 1) { 1728 b->mv[0] = mvstack[b->drl_idx].mv.mv[0]; 1729 } else { 1730 assert(!b->drl_idx); 1731 b->mv[0] = mvstack[0].mv.mv[0]; 1732 fix_mv_precision(f->frame_hdr, &b->mv[0]); 1733 } 1734 if (DEBUG_BLOCK_INFO) 1735 printf("Post-intermode[%d,drl=%d]: r=%d\n", 1736 b->inter_mode, b->drl_idx, ts->msac.rng); 1737 const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; 1738 read_mv_residual(ts, &b->mv[0], mv_prec); 1739 if (DEBUG_BLOCK_INFO) 1740 printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n", 1741 b->mv[0].y, b->mv[0].x, ts->msac.rng); 1742 } 1743 1744 // interintra flags 1745 const int ii_sz_grp = dav1d_ymode_size_context[bs]; 1746 if (f->seq_hdr->inter_intra && 1747 interintra_allowed_mask & (1 << bs) && 1748 dav1d_msac_decode_bool_adapt(&ts->msac, 1749 ts->cdf.m.interintra[ii_sz_grp])) 1750 { 1751 b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac, 1752 ts->cdf.m.interintra_mode[ii_sz_grp], 1753 N_INTER_INTRA_PRED_MODES - 1); 1754 const int wedge_ctx = dav1d_wedge_ctx_lut[bs]; 1755 b->interintra_type = INTER_INTRA_BLEND + 1756 dav1d_msac_decode_bool_adapt(&ts->msac, 1757 ts->cdf.m.interintra_wedge[wedge_ctx]); 1758 if (b->interintra_type == INTER_INTRA_WEDGE) 1759 b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac, 1760 ts->cdf.m.wedge_idx[wedge_ctx], 15); 1761 } else { 1762 b->interintra_type = INTER_INTRA_NONE; 1763 } 1764 if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra && 1765 interintra_allowed_mask & (1 << bs)) 1766 { 1767 printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n", 1768 b->interintra_type, b->interintra_mode, 1769 b->wedge_idx, ts->msac.rng); 1770 } 1771 1772 // motion variation 1773 if (f->frame_hdr->switchable_motion_mode && 1774 b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 && 1775 // is not warped global motion 1776 !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV && 1777 f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) && 1778 // has overlappable neighbours 1779 ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) || 1780 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1)))) 1781 { 1782 // reaching here means the block allows obmc - check warp by 1783 // finding matching-ref blocks in top/left edges 1784 uint64_t mask[2] = { 0, 0 }; 1785 find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4, 1786 have_left, have_top, b->ref[0], mask); 1787 const int allow_warp = !f->svc[b->ref[0]][0].scale && 1788 !f->frame_hdr->force_integer_mv && 1789 f->frame_hdr->warp_motion && (mask[0] | mask[1]); 1790 1791 b->motion_mode = allow_warp ? 1792 dav1d_msac_decode_symbol_adapt4(&ts->msac, 1793 ts->cdf.m.motion_mode[bs], 2) : 1794 dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]); 1795 if (b->motion_mode == MM_WARP) { 1796 has_subpel_filter = 0; 1797 derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv); 1798 #define signabs(v) v < 0 ? '-' : ' ', abs(v) 1799 if (DEBUG_BLOCK_INFO) 1800 printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n" 1801 "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, " 1802 "mv=y:%d,x:%d\n", 1803 signabs(t->warpmv.matrix[0]), 1804 signabs(t->warpmv.matrix[1]), 1805 signabs(t->warpmv.matrix[2]), 1806 signabs(t->warpmv.matrix[3]), 1807 signabs(t->warpmv.matrix[4]), 1808 signabs(t->warpmv.matrix[5]), 1809 signabs(t->warpmv.u.p.alpha), 1810 signabs(t->warpmv.u.p.beta), 1811 signabs(t->warpmv.u.p.gamma), 1812 signabs(t->warpmv.u.p.delta), 1813 b->mv[0].y, b->mv[0].x); 1814 #undef signabs 1815 if (t->frame_thread.pass) { 1816 if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) { 1817 b->matrix[0] = t->warpmv.matrix[2] - 0x10000; 1818 b->matrix[1] = t->warpmv.matrix[3]; 1819 b->matrix[2] = t->warpmv.matrix[4]; 1820 b->matrix[3] = t->warpmv.matrix[5] - 0x10000; 1821 } else { 1822 b->matrix[0] = INT16_MIN; 1823 } 1824 } 1825 } 1826 1827 if (DEBUG_BLOCK_INFO) 1828 printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%" 1829 PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0], 1830 mask[1]); 1831 } else { 1832 b->motion_mode = MM_TRANSLATION; 1833 } 1834 } 1835 1836 // subpel filter 1837 enum Dav1dFilterMode filter[2]; 1838 if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) { 1839 if (has_subpel_filter) { 1840 const int comp = b->comp_type != COMP_INTER_NONE; 1841 const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0], 1842 by4, bx4); 1843 filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac, 1844 ts->cdf.m.filter[0][ctx1], 1845 DAV1D_N_SWITCHABLE_FILTERS - 1); 1846 if (f->seq_hdr->dual_filter) { 1847 const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1, 1848 b->ref[0], by4, bx4); 1849 if (DEBUG_BLOCK_INFO) 1850 printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n", 1851 filter[0], ctx1, ts->msac.rng); 1852 filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac, 1853 ts->cdf.m.filter[1][ctx2], 1854 DAV1D_N_SWITCHABLE_FILTERS - 1); 1855 if (DEBUG_BLOCK_INFO) 1856 printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n", 1857 filter[1], ctx2, ts->msac.rng); 1858 } else { 1859 filter[1] = filter[0]; 1860 if (DEBUG_BLOCK_INFO) 1861 printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n", 1862 filter[0], ctx1, ts->msac.rng); 1863 } 1864 } else { 1865 filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR; 1866 } 1867 } else { 1868 filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode; 1869 } 1870 b->filter2d = dav1d_filter_2d[filter[1]][filter[0]]; 1871 1872 read_vartx_tree(t, b, bs, bx4, by4); 1873 1874 // reconstruction 1875 if (t->frame_thread.pass == 1) { 1876 f->bd_fn.read_coef_blocks(t, bs, b); 1877 } else { 1878 if (f->bd_fn.recon_b_inter(t, bs, b)) return -1; 1879 } 1880 1881 if (f->frame_hdr->loopfilter.level_y[0] || 1882 f->frame_hdr->loopfilter.level_y[1]) 1883 { 1884 const int is_globalmv = 1885 b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV); 1886 const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2]) 1887 &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv]; 1888 const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 }; 1889 enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx; 1890 if (f->frame_hdr->segmentation.lossless[b->seg_id]) { 1891 ytx = (enum RectTxfmSize) TX_4X4; 1892 uvtx = (enum RectTxfmSize) TX_4X4; 1893 } 1894 dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls, 1895 t->bx, t->by, f->w4, f->h4, b->skip, bs, 1896 ytx, tx_split, uvtx, f->cur.p.layout, 1897 &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4], 1898 has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL, 1899 has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL); 1900 } 1901 1902 // context updates 1903 if (is_comp) 1904 splat_tworef_mv(f->c, t, bs, b, bw4, bh4); 1905 else 1906 splat_oneref_mv(f->c, t, bs, b, bw4, bh4); 1907 BlockContext *edge = t->a; 1908 for (int i = 0, off = bx4; i < 2; i++, off = by4, edge = &t->l) { 1909 #define set_ctx(rep_macro) \ 1910 rep_macro(edge->seg_pred, off, seg_pred); \ 1911 rep_macro(edge->skip_mode, off, b->skip_mode); \ 1912 rep_macro(edge->intra, off, 0); \ 1913 rep_macro(edge->skip, off, b->skip); \ 1914 rep_macro(edge->pal_sz, off, 0); \ 1915 /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \ 1916 rep_macro(t->pal_sz_uv[i], off, 0); \ 1917 rep_macro(edge->tx_intra, off, b_dim[2 + i]); \ 1918 rep_macro(edge->comp_type, off, b->comp_type); \ 1919 rep_macro(edge->filter[0], off, filter[0]); \ 1920 rep_macro(edge->filter[1], off, filter[1]); \ 1921 rep_macro(edge->mode, off, b->inter_mode); \ 1922 rep_macro(edge->ref[0], off, b->ref[0]); \ 1923 rep_macro(edge->ref[1], off, ((uint8_t) b->ref[1])) 1924 case_set(b_dim[2 + i]); 1925 #undef set_ctx 1926 } 1927 if (has_chroma) { 1928 dav1d_memset_pow2[ulog2(cbw4)](&t->a->uvmode[cbx4], DC_PRED); 1929 dav1d_memset_pow2[ulog2(cbh4)](&t->l.uvmode[cby4], DC_PRED); 1930 } 1931 } 1932 1933 // update contexts 1934 if (f->frame_hdr->segmentation.enabled && 1935 f->frame_hdr->segmentation.update_map) 1936 { 1937 uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx]; 1938 #define set_ctx(rep_macro) \ 1939 for (int y = 0; y < bh4; y++) { \ 1940 rep_macro(seg_ptr, 0, b->seg_id); \ 1941 seg_ptr += f->b4_stride; \ 1942 } 1943 case_set(b_dim[2]); 1944 #undef set_ctx 1945 } 1946 if (!b->skip) { 1947 uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1]; 1948 const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15); 1949 const int bx_idx = (bx4 & 16) >> 4; 1950 for (int y = 0; y < bh4; y += 2, noskip_mask++) { 1951 (*noskip_mask)[bx_idx] |= mask; 1952 if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway 1953 (*noskip_mask)[1] |= mask; 1954 } 1955 } 1956 1957 if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) { 1958 const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift; 1959 int (*const lowest_px)[2] = ts->lowest_pixel[sby]; 1960 1961 // keep track of motion vectors for each reference 1962 if (b->comp_type == COMP_INTER_NONE) { 1963 // y 1964 if (imin(bw4, bh4) > 1 && 1965 ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) || 1966 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION))) 1967 { 1968 affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim, 1969 b->motion_mode == MM_WARP ? &t->warpmv : 1970 &f->frame_hdr->gmv[b->ref[0]]); 1971 } else { 1972 mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y, 1973 0, &f->svc[b->ref[0]][1]); 1974 if (b->motion_mode == MM_OBMC) { 1975 obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4); 1976 } 1977 } 1978 1979 // uv 1980 if (has_chroma) { 1981 // sub8x8 derivation 1982 int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver; 1983 refmvs_block *const *r; 1984 if (is_sub8x8) { 1985 assert(ss_hor == 1); 1986 r = &t->rt.r[(t->by & 31) + 5]; 1987 if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0; 1988 if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0; 1989 if (bw4 == 1 && bh4 == ss_ver) 1990 is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0; 1991 } 1992 1993 // chroma prediction 1994 if (is_sub8x8) { 1995 assert(ss_hor == 1); 1996 if (bw4 == 1 && bh4 == ss_ver) { 1997 const refmvs_block *const rr = &r[-1][t->bx - 1]; 1998 mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1], 1999 t->by - 1, bh4, rr->mv.mv[0].y, ss_ver, 2000 &f->svc[rr->ref.ref[0] - 1][1]); 2001 } 2002 if (bw4 == 1) { 2003 const refmvs_block *const rr = &r[0][t->bx - 1]; 2004 mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1], 2005 t->by, bh4, rr->mv.mv[0].y, ss_ver, 2006 &f->svc[rr->ref.ref[0] - 1][1]); 2007 } 2008 if (bh4 == ss_ver) { 2009 const refmvs_block *const rr = &r[-1][t->bx]; 2010 mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1], 2011 t->by - 1, bh4, rr->mv.mv[0].y, ss_ver, 2012 &f->svc[rr->ref.ref[0] - 1][1]); 2013 } 2014 mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4, 2015 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]); 2016 } else { 2017 if (imin(cbw4, cbh4) > 1 && 2018 ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) || 2019 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION))) 2020 { 2021 affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim, 2022 b->motion_mode == MM_WARP ? &t->warpmv : 2023 &f->frame_hdr->gmv[b->ref[0]]); 2024 } else { 2025 mc_lowest_px(&lowest_px[b->ref[0]][1], 2026 t->by & ~ss_ver, bh4 << (bh4 == ss_ver), 2027 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]); 2028 if (b->motion_mode == MM_OBMC) { 2029 obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4); 2030 } 2031 } 2032 } 2033 } 2034 } else { 2035 // y 2036 for (int i = 0; i < 2; i++) { 2037 if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) { 2038 affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim, 2039 &f->frame_hdr->gmv[b->ref[i]]); 2040 } else { 2041 mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4, 2042 b->mv[i].y, 0, &f->svc[b->ref[i]][1]); 2043 } 2044 } 2045 2046 // uv 2047 if (has_chroma) for (int i = 0; i < 2; i++) { 2048 if (b->inter_mode == GLOBALMV_GLOBALMV && 2049 imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]]) 2050 { 2051 affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim, 2052 &f->frame_hdr->gmv[b->ref[i]]); 2053 } else { 2054 mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4, 2055 b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]); 2056 } 2057 } 2058 } 2059 } 2060 2061 return 0; 2062 } 2063 2064 #if __has_feature(memory_sanitizer) 2065 2066 #include <sanitizer/msan_interface.h> 2067 2068 static int checked_decode_b(Dav1dTaskContext *const t, 2069 const enum BlockLevel bl, 2070 const enum BlockSize bs, 2071 const enum BlockPartition bp, 2072 const enum EdgeFlags intra_edge_flags) 2073 { 2074 const Dav1dFrameContext *const f = t->f; 2075 const int err = decode_b(t, bl, bs, bp, intra_edge_flags); 2076 2077 if (err == 0 && !(t->frame_thread.pass & 1)) { 2078 const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; 2079 const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 2080 const uint8_t *const b_dim = dav1d_block_dimensions[bs]; 2081 const int bw4 = b_dim[0], bh4 = b_dim[1]; 2082 const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by); 2083 const int has_chroma = f->seq_hdr->layout != DAV1D_PIXEL_LAYOUT_I400 && 2084 (bw4 > ss_hor || t->bx & 1) && 2085 (bh4 > ss_ver || t->by & 1); 2086 2087 for (int p = 0; p < 1 + 2 * has_chroma; p++) { 2088 const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; 2089 const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 2090 const ptrdiff_t stride = f->cur.stride[!!p]; 2091 const int bx = t->bx & ~ss_hor; 2092 const int by = t->by & ~ss_ver; 2093 const int width = w4 << (2 - ss_hor + (bw4 == ss_hor)); 2094 const int height = h4 << (2 - ss_ver + (bh4 == ss_ver)); 2095 2096 const uint8_t *data = f->cur.data[p] + (by << (2 - ss_ver)) * stride + 2097 (bx << (2 - ss_hor + !!f->seq_hdr->hbd)); 2098 2099 for (int y = 0; y < height; data += stride, y++) { 2100 const size_t line_sz = width << !!f->seq_hdr->hbd; 2101 if (__msan_test_shadow(data, line_sz) != -1) { 2102 fprintf(stderr, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n", 2103 p, bx, by, w4, h4, y); 2104 __msan_check_mem_is_initialized(data, line_sz); 2105 } 2106 } 2107 } 2108 } 2109 2110 return err; 2111 } 2112 2113 #define decode_b checked_decode_b 2114 2115 #endif /* defined(__has_feature) */ 2116 2117 static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl, 2118 const EdgeNode *const node) 2119 { 2120 const Dav1dFrameContext *const f = t->f; 2121 Dav1dTileState *const ts = t->ts; 2122 const int hsz = 16 >> bl; 2123 const int have_h_split = f->bw > t->bx + hsz; 2124 const int have_v_split = f->bh > t->by + hsz; 2125 2126 if (!have_h_split && !have_v_split) { 2127 assert(bl < BL_8X8); 2128 return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)); 2129 } 2130 2131 uint16_t *pc; 2132 enum BlockPartition bp; 2133 int ctx, bx8, by8; 2134 if (t->frame_thread.pass != 2) { 2135 if (0 && bl == BL_64X64) 2136 printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n", 2137 f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng); 2138 bx8 = (t->bx & 31) >> 1; 2139 by8 = (t->by & 31) >> 1; 2140 ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8); 2141 pc = ts->cdf.m.partition[bl][ctx]; 2142 } 2143 2144 if (have_h_split && have_v_split) { 2145 if (t->frame_thread.pass == 2) { 2146 const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx]; 2147 bp = b->bl == bl ? b->bp : PARTITION_SPLIT; 2148 } else { 2149 bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc, 2150 dav1d_partition_type_count[bl]); 2151 if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && 2152 (bp == PARTITION_V || bp == PARTITION_V4 || 2153 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT)) 2154 { 2155 return 1; 2156 } 2157 if (DEBUG_BLOCK_INFO) 2158 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", 2159 f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp, 2160 ts->msac.rng); 2161 } 2162 const uint8_t *const b = dav1d_block_sizes[bl][bp]; 2163 2164 switch (bp) { 2165 case PARTITION_NONE: 2166 if (decode_b(t, bl, b[0], PARTITION_NONE, node->o)) 2167 return -1; 2168 break; 2169 case PARTITION_H: 2170 if (decode_b(t, bl, b[0], PARTITION_H, node->h[0])) 2171 return -1; 2172 t->by += hsz; 2173 if (decode_b(t, bl, b[0], PARTITION_H, node->h[1])) 2174 return -1; 2175 t->by -= hsz; 2176 break; 2177 case PARTITION_V: 2178 if (decode_b(t, bl, b[0], PARTITION_V, node->v[0])) 2179 return -1; 2180 t->bx += hsz; 2181 if (decode_b(t, bl, b[0], PARTITION_V, node->v[1])) 2182 return -1; 2183 t->bx -= hsz; 2184 break; 2185 case PARTITION_SPLIT: 2186 if (bl == BL_8X8) { 2187 const EdgeTip *const tip = (const EdgeTip *) node; 2188 assert(hsz == 1); 2189 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL)) 2190 return -1; 2191 const enum Filter2d tl_filter = t->tl_4x4_filter; 2192 t->bx++; 2193 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0])) 2194 return -1; 2195 t->bx--; 2196 t->by++; 2197 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1])) 2198 return -1; 2199 t->bx++; 2200 t->tl_4x4_filter = tl_filter; 2201 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2])) 2202 return -1; 2203 t->bx--; 2204 t->by--; 2205 #if ARCH_X86_64 2206 if (t->frame_thread.pass) { 2207 /* In 8-bit mode with 2-pass decoding the coefficient buffer 2208 * can end up misaligned due to skips here. Work around 2209 * the issue by explicitly realigning the buffer. */ 2210 const int p = t->frame_thread.pass & 1; 2211 ts->frame_thread[p].cf = 2212 (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63); 2213 } 2214 #endif 2215 } else { 2216 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) 2217 return 1; 2218 t->bx += hsz; 2219 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) 2220 return 1; 2221 t->bx -= hsz; 2222 t->by += hsz; 2223 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) 2224 return 1; 2225 t->bx += hsz; 2226 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3))) 2227 return 1; 2228 t->bx -= hsz; 2229 t->by -= hsz; 2230 } 2231 break; 2232 case PARTITION_T_TOP_SPLIT: { 2233 if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL)) 2234 return -1; 2235 t->bx += hsz; 2236 if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1])) 2237 return -1; 2238 t->bx -= hsz; 2239 t->by += hsz; 2240 if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1])) 2241 return -1; 2242 t->by -= hsz; 2243 break; 2244 } 2245 case PARTITION_T_BOTTOM_SPLIT: { 2246 if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0])) 2247 return -1; 2248 t->by += hsz; 2249 if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0])) 2250 return -1; 2251 t->bx += hsz; 2252 if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0)) 2253 return -1; 2254 t->bx -= hsz; 2255 t->by -= hsz; 2256 break; 2257 } 2258 case PARTITION_T_LEFT_SPLIT: { 2259 if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL)) 2260 return -1; 2261 t->by += hsz; 2262 if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1])) 2263 return -1; 2264 t->by -= hsz; 2265 t->bx += hsz; 2266 if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1])) 2267 return -1; 2268 t->bx -= hsz; 2269 break; 2270 } 2271 case PARTITION_T_RIGHT_SPLIT: { 2272 if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0])) 2273 return -1; 2274 t->bx += hsz; 2275 if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0])) 2276 return -1; 2277 t->by += hsz; 2278 if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0)) 2279 return -1; 2280 t->by -= hsz; 2281 t->bx -= hsz; 2282 break; 2283 } 2284 case PARTITION_H4: { 2285 const EdgeBranch *const branch = (const EdgeBranch *) node; 2286 if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0])) 2287 return -1; 2288 t->by += hsz >> 1; 2289 if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4)) 2290 return -1; 2291 t->by += hsz >> 1; 2292 if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM)) 2293 return -1; 2294 t->by += hsz >> 1; 2295 if (t->by < f->bh) 2296 if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1])) 2297 return -1; 2298 t->by -= hsz * 3 >> 1; 2299 break; 2300 } 2301 case PARTITION_V4: { 2302 const EdgeBranch *const branch = (const EdgeBranch *) node; 2303 if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0])) 2304 return -1; 2305 t->bx += hsz >> 1; 2306 if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4)) 2307 return -1; 2308 t->bx += hsz >> 1; 2309 if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT)) 2310 return -1; 2311 t->bx += hsz >> 1; 2312 if (t->bx < f->bw) 2313 if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1])) 2314 return -1; 2315 t->bx -= hsz * 3 >> 1; 2316 break; 2317 } 2318 default: assert(0); 2319 } 2320 } else if (have_h_split) { 2321 unsigned is_split; 2322 if (t->frame_thread.pass == 2) { 2323 const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx]; 2324 is_split = b->bl != bl; 2325 } else { 2326 is_split = dav1d_msac_decode_bool(&ts->msac, 2327 gather_top_partition_prob(pc, bl)); 2328 if (DEBUG_BLOCK_INFO) 2329 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", 2330 f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, 2331 is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng); 2332 } 2333 2334 assert(bl < BL_8X8); 2335 if (is_split) { 2336 bp = PARTITION_SPLIT; 2337 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1; 2338 t->bx += hsz; 2339 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1; 2340 t->bx -= hsz; 2341 } else { 2342 bp = PARTITION_H; 2343 if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0], 2344 PARTITION_H, node->h[0])) 2345 return -1; 2346 } 2347 } else { 2348 assert(have_v_split); 2349 unsigned is_split; 2350 if (t->frame_thread.pass == 2) { 2351 const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx]; 2352 is_split = b->bl != bl; 2353 } else { 2354 is_split = dav1d_msac_decode_bool(&ts->msac, 2355 gather_left_partition_prob(pc, bl)); 2356 if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split) 2357 return 1; 2358 if (DEBUG_BLOCK_INFO) 2359 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", 2360 f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, 2361 is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng); 2362 } 2363 2364 assert(bl < BL_8X8); 2365 if (is_split) { 2366 bp = PARTITION_SPLIT; 2367 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1; 2368 t->by += hsz; 2369 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1; 2370 t->by -= hsz; 2371 } else { 2372 bp = PARTITION_V; 2373 if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0], 2374 PARTITION_V, node->v[0])) 2375 return -1; 2376 } 2377 } 2378 2379 if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) { 2380 #define set_ctx(rep_macro) \ 2381 rep_macro(t->a->partition, bx8, dav1d_al_part_ctx[0][bl][bp]); \ 2382 rep_macro(t->l.partition, by8, dav1d_al_part_ctx[1][bl][bp]) 2383 case_set_upto16(ulog2(hsz)); 2384 #undef set_ctx 2385 } 2386 2387 return 0; 2388 } 2389 2390 static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) { 2391 memset(ctx->intra, keyframe, sizeof(ctx->intra)); 2392 memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode)); 2393 if (keyframe) 2394 memset(ctx->mode, DC_PRED, sizeof(ctx->mode)); 2395 2396 if (pass == 2) return; 2397 2398 memset(ctx->partition, 0, sizeof(ctx->partition)); 2399 memset(ctx->skip, 0, sizeof(ctx->skip)); 2400 memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode)); 2401 memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y)); 2402 memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv)); 2403 memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra)); 2404 memset(ctx->tx, TX_64X64, sizeof(ctx->tx)); 2405 if (!keyframe) { 2406 memset(ctx->ref, -1, sizeof(ctx->ref)); 2407 memset(ctx->comp_type, 0, sizeof(ctx->comp_type)); 2408 memset(ctx->mode, NEARESTMV, sizeof(ctx->mode)); 2409 } 2410 memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef)); 2411 memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef)); 2412 memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter)); 2413 memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred)); 2414 memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz)); 2415 } 2416 2417 // { Y+U+V, Y+U } * 4 2418 static const uint8_t ss_size_mul[4][2] = { 2419 [DAV1D_PIXEL_LAYOUT_I400] = { 4, 4 }, 2420 [DAV1D_PIXEL_LAYOUT_I420] = { 6, 5 }, 2421 [DAV1D_PIXEL_LAYOUT_I422] = { 8, 6 }, 2422 [DAV1D_PIXEL_LAYOUT_I444] = { 12, 8 }, 2423 }; 2424 2425 static void setup_tile(Dav1dTileState *const ts, 2426 const Dav1dFrameContext *const f, 2427 const uint8_t *const data, const size_t sz, 2428 const int tile_row, const int tile_col, 2429 const unsigned tile_start_off) 2430 { 2431 const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col]; 2432 const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128; 2433 const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1]; 2434 const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row]; 2435 const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1]; 2436 const int sb_shift = f->sb_shift; 2437 2438 const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout]; 2439 for (int p = 0; p < 2; p++) { 2440 ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ? 2441 &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] : 2442 NULL; 2443 ts->frame_thread[p].cbi = f->frame_thread.cbi ? 2444 &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] : 2445 NULL; 2446 ts->frame_thread[p].cf = f->frame_thread.cf ? 2447 (uint8_t*)f->frame_thread.cf + 2448 (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) : 2449 NULL; 2450 } 2451 2452 dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf); 2453 ts->last_qidx = f->frame_hdr->quant.yac; 2454 ts->last_delta_lf.u32 = 0; 2455 2456 dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update); 2457 2458 ts->tiling.row = tile_row; 2459 ts->tiling.col = tile_col; 2460 ts->tiling.col_start = col_sb_start << sb_shift; 2461 ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw); 2462 ts->tiling.row_start = row_sb_start << sb_shift; 2463 ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh); 2464 2465 // Reference Restoration Unit (used for exp coding) 2466 int sb_idx, unit_idx; 2467 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { 2468 // vertical components only 2469 sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w; 2470 unit_idx = (ts->tiling.row_start & 16) >> 3; 2471 } else { 2472 sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start; 2473 unit_idx = ((ts->tiling.row_start & 16) >> 3) + 2474 ((ts->tiling.col_start & 16) >> 4); 2475 } 2476 for (int p = 0; p < 3; p++) { 2477 if (!((f->lf.restore_planes >> p) & 1U)) 2478 continue; 2479 2480 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { 2481 const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 2482 const int d = f->frame_hdr->super_res.width_scale_denominator; 2483 const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p]; 2484 const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3; 2485 const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift; 2486 const int px_x = x << (unit_size_log2 + ss_hor); 2487 const int u_idx = unit_idx + ((px_x & 64) >> 6); 2488 const int sb128x = px_x >> 7; 2489 if (sb128x >= f->sr_sb128w) continue; 2490 ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx]; 2491 } else { 2492 ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx]; 2493 } 2494 2495 ts->lr_ref[p]->filter_v[0] = 3; 2496 ts->lr_ref[p]->filter_v[1] = -7; 2497 ts->lr_ref[p]->filter_v[2] = 15; 2498 ts->lr_ref[p]->filter_h[0] = 3; 2499 ts->lr_ref[p]->filter_h[1] = -7; 2500 ts->lr_ref[p]->filter_h[2] = 15; 2501 ts->lr_ref[p]->sgr_weights[0] = -32; 2502 ts->lr_ref[p]->sgr_weights[1] = 31; 2503 } 2504 2505 if (f->c->n_tc > 1) { 2506 for (int p = 0; p < 2; p++) 2507 atomic_init(&ts->progress[p], row_sb_start); 2508 } 2509 } 2510 2511 static void read_restoration_info(Dav1dTaskContext *const t, 2512 Av1RestorationUnit *const lr, const int p, 2513 const enum Dav1dRestorationType frame_type) 2514 { 2515 const Dav1dFrameContext *const f = t->f; 2516 Dav1dTileState *const ts = t->ts; 2517 2518 if (frame_type == DAV1D_RESTORATION_SWITCHABLE) { 2519 const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac, 2520 ts->cdf.m.restore_switchable, 2); 2521 lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */ 2522 } else { 2523 const unsigned type = 2524 dav1d_msac_decode_bool_adapt(&ts->msac, 2525 frame_type == DAV1D_RESTORATION_WIENER ? 2526 ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj); 2527 lr->type = type ? frame_type : DAV1D_RESTORATION_NONE; 2528 } 2529 2530 if (lr->type == DAV1D_RESTORATION_WIENER) { 2531 lr->filter_v[0] = p ? 0 : 2532 dav1d_msac_decode_subexp(&ts->msac, 2533 ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5; 2534 lr->filter_v[1] = 2535 dav1d_msac_decode_subexp(&ts->msac, 2536 ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23; 2537 lr->filter_v[2] = 2538 dav1d_msac_decode_subexp(&ts->msac, 2539 ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17; 2540 2541 lr->filter_h[0] = p ? 0 : 2542 dav1d_msac_decode_subexp(&ts->msac, 2543 ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5; 2544 lr->filter_h[1] = 2545 dav1d_msac_decode_subexp(&ts->msac, 2546 ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23; 2547 lr->filter_h[2] = 2548 dav1d_msac_decode_subexp(&ts->msac, 2549 ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17; 2550 memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights)); 2551 ts->lr_ref[p] = lr; 2552 if (DEBUG_BLOCK_INFO) 2553 printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n", 2554 p, lr->filter_v[0], lr->filter_v[1], 2555 lr->filter_v[2], lr->filter_h[0], 2556 lr->filter_h[1], lr->filter_h[2], ts->msac.rng); 2557 } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) { 2558 const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4); 2559 const uint16_t *const sgr_params = dav1d_sgr_params[idx]; 2560 lr->type += idx; 2561 lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac, 2562 ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0; 2563 lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac, 2564 ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95; 2565 memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v)); 2566 memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h)); 2567 ts->lr_ref[p] = lr; 2568 if (DEBUG_BLOCK_INFO) 2569 printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n", 2570 p, idx, lr->sgr_weights[0], 2571 lr->sgr_weights[1], ts->msac.rng); 2572 } 2573 } 2574 2575 // modeled after the equivalent function in aomdec:decodeframe.c 2576 static int check_trailing_bits_after_symbol_coder(const MsacContext *const msac) { 2577 // check marker bit (single 1), followed by zeroes 2578 const int n_bits = -(msac->cnt + 14); 2579 assert(n_bits <= 0); // this assumes we errored out when cnt <= -15 in caller 2580 const int n_bytes = (n_bits + 7) >> 3; 2581 const uint8_t *p = &msac->buf_pos[n_bytes]; 2582 const int pattern = 128 >> ((n_bits - 1) & 7); 2583 if ((p[-1] & (2 * pattern - 1)) != pattern) 2584 return 1; 2585 2586 // check remainder zero bytes 2587 for (; p < msac->buf_end; p++) 2588 if (*p) 2589 return 1; 2590 2591 return 0; 2592 } 2593 2594 int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) { 2595 const Dav1dFrameContext *const f = t->f; 2596 const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64; 2597 Dav1dTileState *const ts = t->ts; 2598 const Dav1dContext *const c = f->c; 2599 const int sb_step = f->sb_step; 2600 const int tile_row = ts->tiling.row, tile_col = ts->tiling.col; 2601 const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col]; 2602 const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128; 2603 2604 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { 2605 dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start, 2606 ts->tiling.col_end, ts->tiling.row_start, 2607 ts->tiling.row_end, t->by >> f->sb_shift, 2608 ts->tiling.row, t->frame_thread.pass); 2609 } 2610 2611 if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) { 2612 const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift; 2613 int (*const lowest_px)[2] = ts->lowest_pixel[sby]; 2614 for (int n = 0; n < 7; n++) 2615 for (int m = 0; m < 2; m++) 2616 lowest_px[n][m] = INT_MIN; 2617 } 2618 2619 reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass); 2620 if (t->frame_thread.pass == 2) { 2621 const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0; 2622 for (t->bx = ts->tiling.col_start, 2623 t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w; 2624 t->bx < ts->tiling.col_end; t->bx += sb_step) 2625 { 2626 if (atomic_load_explicit(c->flush, memory_order_acquire)) 2627 return 1; 2628 if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl])) 2629 return 1; 2630 if (t->bx & 16 || f->seq_hdr->sb128) 2631 t->a++; 2632 } 2633 f->bd_fn.backup_ipred_edge(t); 2634 return 0; 2635 } 2636 2637 if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) { 2638 f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row, 2639 ts->tiling.col_start >> 1, ts->tiling.col_end >> 1, 2640 t->by >> 1, (t->by + sb_step) >> 1); 2641 } 2642 memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv)); 2643 const int sb128y = t->by >> 5; 2644 for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w, 2645 t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start; 2646 t->bx < ts->tiling.col_end; t->bx += sb_step) 2647 { 2648 if (atomic_load_explicit(c->flush, memory_order_acquire)) 2649 return 1; 2650 if (root_bl == BL_128X128) { 2651 t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx; 2652 t->cur_sb_cdef_idx_ptr[0] = -1; 2653 t->cur_sb_cdef_idx_ptr[1] = -1; 2654 t->cur_sb_cdef_idx_ptr[2] = -1; 2655 t->cur_sb_cdef_idx_ptr[3] = -1; 2656 } else { 2657 t->cur_sb_cdef_idx_ptr = 2658 &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) + 2659 ((t->by & 16) >> 3)]; 2660 t->cur_sb_cdef_idx_ptr[0] = -1; 2661 } 2662 // Restoration filter 2663 for (int p = 0; p < 3; p++) { 2664 if (!((f->lf.restore_planes >> p) & 1U)) 2665 continue; 2666 2667 const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; 2668 const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 2669 const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p]; 2670 const int y = t->by * 4 >> ss_ver; 2671 const int h = (f->cur.p.h + ss_ver) >> ss_ver; 2672 2673 const int unit_size = 1 << unit_size_log2; 2674 const unsigned mask = unit_size - 1; 2675 if (y & mask) continue; 2676 const int half_unit = unit_size >> 1; 2677 // Round half up at frame boundaries, if there's more than one 2678 // restoration unit 2679 if (y && y + half_unit > h) continue; 2680 2681 const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p]; 2682 2683 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { 2684 const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor; 2685 const int n_units = imax(1, (w + half_unit) >> unit_size_log2); 2686 2687 const int d = f->frame_hdr->super_res.width_scale_denominator; 2688 const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3; 2689 const int x0 = ((4 * t->bx * d >> ss_hor) + rnd) >> shift; 2690 const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift; 2691 2692 for (int x = x0; x < imin(x1, n_units); x++) { 2693 const int px_x = x << (unit_size_log2 + ss_hor); 2694 const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7); 2695 const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6); 2696 Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx]; 2697 2698 read_restoration_info(t, lr, p, frame_type); 2699 } 2700 } else { 2701 const int x = 4 * t->bx >> ss_hor; 2702 if (x & mask) continue; 2703 const int w = (f->cur.p.w + ss_hor) >> ss_hor; 2704 // Round half up at frame boundaries, if there's more than one 2705 // restoration unit 2706 if (x && x + half_unit > w) continue; 2707 const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5); 2708 const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4); 2709 Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx]; 2710 2711 read_restoration_info(t, lr, p, frame_type); 2712 } 2713 } 2714 if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl])) 2715 return 1; 2716 if (t->bx & 16 || f->seq_hdr->sb128) { 2717 t->a++; 2718 t->lf_mask++; 2719 } 2720 } 2721 2722 if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) { 2723 dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt, 2724 ts->tiling.col_start >> 1, ts->tiling.col_end >> 1, 2725 t->by >> 1, (t->by + sb_step) >> 1); 2726 } 2727 2728 // backup pre-loopfilter pixels for intra prediction of the next sbrow 2729 if (t->frame_thread.pass != 1) 2730 f->bd_fn.backup_ipred_edge(t); 2731 2732 // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix" 2733 // up the initial value in neighbour tiles when running the loopfilter 2734 int align_h = (f->bh + 31) & ~31; 2735 memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by], 2736 &t->l.tx_lpf_y[t->by & 16], sb_step); 2737 const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; 2738 align_h >>= ss_ver; 2739 memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)], 2740 &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver); 2741 2742 // error out on symbol decoder overread 2743 if (ts->msac.cnt <= -15) return 1; 2744 2745 return c->strict_std_compliance && 2746 (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] && 2747 check_trailing_bits_after_symbol_coder(&ts->msac); 2748 } 2749 2750 int dav1d_decode_frame_init(Dav1dFrameContext *const f) { 2751 const Dav1dContext *const c = f->c; 2752 int retval = DAV1D_ERR(ENOMEM); 2753 2754 if (f->sbh > f->lf.start_of_tile_row_sz) { 2755 dav1d_free(f->lf.start_of_tile_row); 2756 f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t)); 2757 if (!f->lf.start_of_tile_row) { 2758 f->lf.start_of_tile_row_sz = 0; 2759 goto error; 2760 } 2761 f->lf.start_of_tile_row_sz = f->sbh; 2762 } 2763 int sby = 0; 2764 for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) { 2765 f->lf.start_of_tile_row[sby++] = tile_row; 2766 while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1]) 2767 f->lf.start_of_tile_row[sby++] = 0; 2768 } 2769 2770 const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows; 2771 if (n_ts != f->n_ts) { 2772 if (c->n_fc > 1) { 2773 dav1d_free(f->frame_thread.tile_start_off); 2774 f->frame_thread.tile_start_off = 2775 dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts); 2776 if (!f->frame_thread.tile_start_off) { 2777 f->n_ts = 0; 2778 goto error; 2779 } 2780 } 2781 dav1d_free_aligned(f->ts); 2782 f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32); 2783 if (!f->ts) goto error; 2784 f->n_ts = n_ts; 2785 } 2786 2787 const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1)); 2788 if (a_sz != f->a_sz) { 2789 dav1d_free(f->a); 2790 f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz); 2791 if (!f->a) { 2792 f->a_sz = 0; 2793 goto error; 2794 } 2795 f->a_sz = a_sz; 2796 } 2797 2798 const int num_sb128 = f->sb128w * f->sb128h; 2799 const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout]; 2800 const int hbd = !!f->seq_hdr->hbd; 2801 if (c->n_fc > 1) { 2802 const unsigned sb_step4 = f->sb_step * 4; 2803 int tile_idx = 0; 2804 for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) { 2805 const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] * 2806 sb_step4 * f->sb128w * 128; 2807 const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] - 2808 f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4; 2809 for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) { 2810 f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff * 2811 f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4; 2812 } 2813 } 2814 2815 const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh; 2816 if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) { 2817 dav1d_free(f->tile_thread.lowest_pixel_mem); 2818 f->tile_thread.lowest_pixel_mem = 2819 dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz * 2820 sizeof(*f->tile_thread.lowest_pixel_mem)); 2821 if (!f->tile_thread.lowest_pixel_mem) { 2822 f->tile_thread.lowest_pixel_mem_sz = 0; 2823 goto error; 2824 } 2825 f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz; 2826 } 2827 int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem; 2828 for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows; 2829 tile_row++, tile_row_base += f->frame_hdr->tiling.cols) 2830 { 2831 const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] - 2832 f->frame_hdr->tiling.row_start_sb[tile_row]; 2833 for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) { 2834 f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr; 2835 lowest_pixel_ptr += tile_row_sb_h; 2836 } 2837 } 2838 2839 const int cbi_sz = num_sb128 * size_mul[0]; 2840 if (cbi_sz != f->frame_thread.cbi_sz) { 2841 dav1d_free_aligned(f->frame_thread.cbi); 2842 f->frame_thread.cbi = 2843 dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) * 2844 cbi_sz * 32 * 32 / 4, 64); 2845 if (!f->frame_thread.cbi) { 2846 f->frame_thread.cbi_sz = 0; 2847 goto error; 2848 } 2849 f->frame_thread.cbi_sz = cbi_sz; 2850 } 2851 2852 const int cf_sz = (num_sb128 * size_mul[0]) << hbd; 2853 if (cf_sz != f->frame_thread.cf_sz) { 2854 dav1d_free_aligned(f->frame_thread.cf); 2855 f->frame_thread.cf = 2856 dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64); 2857 if (!f->frame_thread.cf) { 2858 f->frame_thread.cf_sz = 0; 2859 goto error; 2860 } 2861 memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2); 2862 f->frame_thread.cf_sz = cf_sz; 2863 } 2864 2865 if (f->frame_hdr->allow_screen_content_tools) { 2866 const int pal_sz = num_sb128 << hbd; 2867 if (pal_sz != f->frame_thread.pal_sz) { 2868 dav1d_free_aligned(f->frame_thread.pal); 2869 f->frame_thread.pal = 2870 dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) * 2871 pal_sz * 16 * 16, 64); 2872 if (!f->frame_thread.pal) { 2873 f->frame_thread.pal_sz = 0; 2874 goto error; 2875 } 2876 f->frame_thread.pal_sz = pal_sz; 2877 } 2878 2879 const int pal_idx_sz = num_sb128 * size_mul[1]; 2880 if (pal_idx_sz != f->frame_thread.pal_idx_sz) { 2881 dav1d_free_aligned(f->frame_thread.pal_idx); 2882 f->frame_thread.pal_idx = 2883 dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) * 2884 pal_idx_sz * 128 * 128 / 8, 64); 2885 if (!f->frame_thread.pal_idx) { 2886 f->frame_thread.pal_idx_sz = 0; 2887 goto error; 2888 } 2889 f->frame_thread.pal_idx_sz = pal_idx_sz; 2890 } 2891 } else if (f->frame_thread.pal) { 2892 dav1d_freep_aligned(&f->frame_thread.pal); 2893 dav1d_freep_aligned(&f->frame_thread.pal_idx); 2894 f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0; 2895 } 2896 } 2897 2898 // update allocation of block contexts for above 2899 ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1]; 2900 const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1]; 2901 const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize; 2902 if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] || 2903 uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] || 2904 need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy || 2905 f->sbh != f->lf.cdef_buf_sbh) 2906 { 2907 dav1d_free_aligned(f->lf.cdef_line_buf); 2908 size_t alloc_sz = 64; 2909 alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy; 2910 alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy; 2911 uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32); 2912 if (!ptr) { 2913 f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0; 2914 goto error; 2915 } 2916 2917 ptr += 32; 2918 if (y_stride < 0) { 2919 f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1); 2920 f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3); 2921 } else { 2922 f->lf.cdef_line[0][0] = ptr + y_stride * 0; 2923 f->lf.cdef_line[1][0] = ptr + y_stride * 2; 2924 } 2925 ptr += llabs(y_stride) * f->sbh * 4; 2926 if (uv_stride < 0) { 2927 f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1); 2928 f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3); 2929 f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5); 2930 f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7); 2931 } else { 2932 f->lf.cdef_line[0][1] = ptr + uv_stride * 0; 2933 f->lf.cdef_line[0][2] = ptr + uv_stride * 2; 2934 f->lf.cdef_line[1][1] = ptr + uv_stride * 4; 2935 f->lf.cdef_line[1][2] = ptr + uv_stride * 6; 2936 } 2937 2938 if (need_cdef_lpf_copy) { 2939 ptr += llabs(uv_stride) * f->sbh * 8; 2940 if (y_stride < 0) 2941 f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1); 2942 else 2943 f->lf.cdef_lpf_line[0] = ptr; 2944 ptr += llabs(y_stride) * f->sbh * 4; 2945 if (uv_stride < 0) { 2946 f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1); 2947 f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1); 2948 } else { 2949 f->lf.cdef_lpf_line[1] = ptr; 2950 f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4; 2951 } 2952 } 2953 2954 f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4; 2955 f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8; 2956 f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy; 2957 f->lf.cdef_buf_sbh = f->sbh; 2958 } 2959 2960 const int sb128 = f->seq_hdr->sb128; 2961 const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12; 2962 y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1]; 2963 if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] || 2964 uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1]) 2965 { 2966 dav1d_free_aligned(f->lf.lr_line_buf); 2967 // lr simd may overread the input, so slightly over-allocate the lpf buffer 2968 size_t alloc_sz = 128; 2969 alloc_sz += (size_t)llabs(y_stride) * num_lines; 2970 alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2; 2971 uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64); 2972 if (!ptr) { 2973 f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0; 2974 goto error; 2975 } 2976 2977 ptr += 64; 2978 if (y_stride < 0) 2979 f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1); 2980 else 2981 f->lf.lr_lpf_line[0] = ptr; 2982 ptr += llabs(y_stride) * num_lines; 2983 if (uv_stride < 0) { 2984 f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1); 2985 f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1); 2986 } else { 2987 f->lf.lr_lpf_line[1] = ptr; 2988 f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines; 2989 } 2990 2991 f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines; 2992 f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2; 2993 } 2994 2995 // update allocation for loopfilter masks 2996 if (num_sb128 != f->lf.mask_sz) { 2997 dav1d_free(f->lf.mask); 2998 dav1d_free(f->lf.level); 2999 f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128); 3000 // over-allocate by 3 bytes since some of the SIMD implementations 3001 // index this from the level type and can thus over-read by up to 3 3002 f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3); 3003 if (!f->lf.mask || !f->lf.level) { 3004 f->lf.mask_sz = 0; 3005 goto error; 3006 } 3007 if (c->n_fc > 1) { 3008 dav1d_free(f->frame_thread.b); 3009 f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) * 3010 num_sb128 * 32 * 32); 3011 if (!f->frame_thread.b) { 3012 f->lf.mask_sz = 0; 3013 goto error; 3014 } 3015 } 3016 f->lf.mask_sz = num_sb128; 3017 } 3018 3019 f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7; 3020 const int lr_mask_sz = f->sr_sb128w * f->sb128h; 3021 if (lr_mask_sz != f->lf.lr_mask_sz) { 3022 dav1d_free(f->lf.lr_mask); 3023 f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz); 3024 if (!f->lf.lr_mask) { 3025 f->lf.lr_mask_sz = 0; 3026 goto error; 3027 } 3028 f->lf.lr_mask_sz = lr_mask_sz; 3029 } 3030 f->lf.restore_planes = 3031 ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) + 3032 ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) + 3033 ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2); 3034 if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) { 3035 dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness); 3036 f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness; 3037 } 3038 dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 }); 3039 memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128); 3040 3041 const int ipred_edge_sz = f->sbh * f->sb128w << hbd; 3042 if (ipred_edge_sz != f->ipred_edge_sz) { 3043 dav1d_free_aligned(f->ipred_edge[0]); 3044 uint8_t *ptr = f->ipred_edge[0] = 3045 dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64); 3046 if (!ptr) { 3047 f->ipred_edge_sz = 0; 3048 goto error; 3049 } 3050 f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1; 3051 f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2; 3052 f->ipred_edge_sz = ipred_edge_sz; 3053 } 3054 3055 const int re_sz = f->sb128h * f->frame_hdr->tiling.cols; 3056 if (re_sz != f->lf.re_sz) { 3057 dav1d_free(f->lf.tx_lpf_right_edge[0]); 3058 f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2); 3059 if (!f->lf.tx_lpf_right_edge[0]) { 3060 f->lf.re_sz = 0; 3061 goto error; 3062 } 3063 f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32; 3064 f->lf.re_sz = re_sz; 3065 } 3066 3067 // init ref mvs 3068 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { 3069 const int ret = 3070 dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr, 3071 f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs, 3072 f->c->n_tc, f->c->n_fc); 3073 if (ret < 0) goto error; 3074 } 3075 3076 // setup dequant tables 3077 init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq); 3078 if (f->frame_hdr->quant.qm) 3079 for (int i = 0; i < N_RECT_TX_SIZES; i++) { 3080 f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i]; 3081 f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i]; 3082 f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i]; 3083 } 3084 else 3085 memset(f->qm, 0, sizeof(f->qm)); 3086 3087 // setup jnt_comp weights 3088 if (f->frame_hdr->switchable_comp_refs) { 3089 for (int i = 0; i < 7; i++) { 3090 const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset; 3091 3092 for (int j = i + 1; j < 7; j++) { 3093 const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset; 3094 3095 const unsigned d1 = 3096 imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc, 3097 f->cur.frame_hdr->frame_offset)), 31); 3098 const unsigned d0 = 3099 imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc, 3100 f->cur.frame_hdr->frame_offset)), 31); 3101 const int order = d0 <= d1; 3102 3103 static const uint8_t quant_dist_weight[3][2] = { 3104 { 2, 3 }, { 2, 5 }, { 2, 7 } 3105 }; 3106 static const uint8_t quant_dist_lookup_table[4][2] = { 3107 { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } 3108 }; 3109 3110 int k; 3111 for (k = 0; k < 3; k++) { 3112 const int c0 = quant_dist_weight[k][order]; 3113 const int c1 = quant_dist_weight[k][!order]; 3114 const int d0_c0 = d0 * c0; 3115 const int d1_c1 = d1 * c1; 3116 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break; 3117 } 3118 3119 f->jnt_weights[i][j] = quant_dist_lookup_table[k][order]; 3120 } 3121 } 3122 } 3123 3124 /* Init loopfilter pointers. Increasing NULL pointers is technically UB, 3125 * so just point the chroma pointers in 4:0:0 to the luma plane here to 3126 * avoid having additional in-loop branches in various places. We never 3127 * dereference those pointers so it doesn't really matter what they 3128 * point at, as long as the pointers are valid. */ 3129 const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400; 3130 f->lf.p[0] = f->cur.data[0]; 3131 f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0]; 3132 f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0]; 3133 f->lf.sr_p[0] = f->sr_cur.p.data[0]; 3134 f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0]; 3135 f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0]; 3136 3137 retval = 0; 3138 error: 3139 return retval; 3140 } 3141 3142 int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) { 3143 const Dav1dContext *const c = f->c; 3144 int retval = DAV1D_ERR(EINVAL); 3145 3146 if (f->frame_hdr->refresh_context) 3147 dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf); 3148 3149 // parse individual tiles per tile group 3150 int tile_row = 0, tile_col = 0; 3151 f->task_thread.update_set = 0; 3152 for (int i = 0; i < f->n_tile_data; i++) { 3153 const uint8_t *data = f->tile[i].data.data; 3154 size_t size = f->tile[i].data.sz; 3155 3156 for (int j = f->tile[i].start; j <= f->tile[i].end; j++) { 3157 size_t tile_sz; 3158 if (j == f->tile[i].end) { 3159 tile_sz = size; 3160 } else { 3161 if (f->frame_hdr->tiling.n_bytes > size) goto error; 3162 tile_sz = 0; 3163 for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++) 3164 tile_sz |= (unsigned)*data++ << (k * 8); 3165 tile_sz++; 3166 size -= f->frame_hdr->tiling.n_bytes; 3167 if (tile_sz > size) goto error; 3168 } 3169 3170 setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++, 3171 c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0); 3172 3173 if (tile_col == f->frame_hdr->tiling.cols) { 3174 tile_col = 0; 3175 tile_row++; 3176 } 3177 if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context) 3178 f->task_thread.update_set = 1; 3179 data += tile_sz; 3180 size -= tile_sz; 3181 } 3182 } 3183 3184 if (c->n_tc > 1) { 3185 const int uses_2pass = c->n_fc > 1; 3186 for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++) 3187 reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 3188 uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0); 3189 } 3190 3191 retval = 0; 3192 error: 3193 return retval; 3194 } 3195 3196 int dav1d_decode_frame_main(Dav1dFrameContext *const f) { 3197 const Dav1dContext *const c = f->c; 3198 int retval = DAV1D_ERR(EINVAL); 3199 3200 assert(f->c->n_tc == 1); 3201 3202 Dav1dTaskContext *const t = &c->tc[f - c->fc]; 3203 t->f = f; 3204 t->frame_thread.pass = 0; 3205 3206 for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++) 3207 reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 0); 3208 3209 // no threading - we explicitly interleave tile/sbrow decoding 3210 // and post-filtering, so that the full process runs in-line 3211 for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) { 3212 const int sbh_end = 3213 imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh); 3214 for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row]; 3215 sby < sbh_end; sby++) 3216 { 3217 t->by = sby << (4 + f->seq_hdr->sb128); 3218 const int by_end = (t->by + f->sb_step) >> 1; 3219 if (f->frame_hdr->use_ref_frame_mvs) { 3220 f->c->refmvs_dsp.load_tmvs(&f->rf, tile_row, 3221 0, f->bw >> 1, t->by >> 1, by_end); 3222 } 3223 for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) { 3224 t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col]; 3225 if (dav1d_decode_tile_sbrow(t)) goto error; 3226 } 3227 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { 3228 dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt, 3229 0, f->bw >> 1, t->by >> 1, by_end); 3230 } 3231 3232 // loopfilter + cdef + restoration 3233 f->bd_fn.filter_sbrow(f, sby); 3234 } 3235 } 3236 3237 retval = 0; 3238 error: 3239 return retval; 3240 } 3241 3242 void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) { 3243 const Dav1dContext *const c = f->c; 3244 3245 if (f->sr_cur.p.data[0]) 3246 atomic_init(&f->task_thread.error, 0); 3247 3248 if (c->n_fc > 1 && retval && f->frame_thread.cf) { 3249 memset(f->frame_thread.cf, 0, 3250 (size_t)f->frame_thread.cf_sz * 128 * 128 / 2); 3251 } 3252 for (int i = 0; i < 7; i++) { 3253 if (f->refp[i].p.frame_hdr) { 3254 if (!retval && c->n_fc > 1 && c->strict_std_compliance && 3255 atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR) 3256 { 3257 retval = DAV1D_ERR(EINVAL); 3258 atomic_store(&f->task_thread.error, 1); 3259 atomic_store(&f->sr_cur.progress[1], FRAME_ERROR); 3260 } 3261 dav1d_thread_picture_unref(&f->refp[i]); 3262 } 3263 dav1d_ref_dec(&f->ref_mvs_ref[i]); 3264 } 3265 3266 dav1d_picture_unref_internal(&f->cur); 3267 dav1d_thread_picture_unref(&f->sr_cur); 3268 dav1d_cdf_thread_unref(&f->in_cdf); 3269 if (f->frame_hdr && f->frame_hdr->refresh_context) { 3270 if (f->out_cdf.progress) 3271 atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR); 3272 dav1d_cdf_thread_unref(&f->out_cdf); 3273 } 3274 dav1d_ref_dec(&f->cur_segmap_ref); 3275 dav1d_ref_dec(&f->prev_segmap_ref); 3276 dav1d_ref_dec(&f->mvs_ref); 3277 dav1d_ref_dec(&f->seq_hdr_ref); 3278 dav1d_ref_dec(&f->frame_hdr_ref); 3279 3280 for (int i = 0; i < f->n_tile_data; i++) 3281 dav1d_data_unref_internal(&f->tile[i].data); 3282 f->task_thread.retval = retval; 3283 } 3284 3285 int dav1d_decode_frame(Dav1dFrameContext *const f) { 3286 assert(f->c->n_fc == 1); 3287 // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task 3288 // threads also. Not sure it makes a measurable difference. 3289 int res = dav1d_decode_frame_init(f); 3290 if (!res) res = dav1d_decode_frame_init_cdf(f); 3291 // wait until all threads have completed 3292 if (!res) { 3293 if (f->c->n_tc > 1) { 3294 res = dav1d_task_create_tile_sbrow(f, 0, 1); 3295 pthread_mutex_lock(&f->task_thread.ttd->lock); 3296 pthread_cond_signal(&f->task_thread.ttd->cond); 3297 if (!res) { 3298 while (!f->task_thread.done[0] || 3299 atomic_load(&f->task_thread.task_counter) > 0) 3300 { 3301 pthread_cond_wait(&f->task_thread.cond, 3302 &f->task_thread.ttd->lock); 3303 } 3304 } 3305 pthread_mutex_unlock(&f->task_thread.ttd->lock); 3306 res = f->task_thread.retval; 3307 } else { 3308 res = dav1d_decode_frame_main(f); 3309 if (!res && f->frame_hdr->refresh_context && f->task_thread.update_set) { 3310 dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf, 3311 &f->ts[f->frame_hdr->tiling.update].cdf); 3312 } 3313 } 3314 } 3315 dav1d_decode_frame_exit(f, res); 3316 res = f->task_thread.retval; 3317 f->n_tile_data = 0; 3318 return res; 3319 } 3320 3321 static int get_upscale_x0(const int in_w, const int out_w, const int step) { 3322 const int err = out_w * step - (in_w << 14); 3323 const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2); 3324 return x0 & 0x3fff; 3325 } 3326 3327 int dav1d_submit_frame(Dav1dContext *const c) { 3328 Dav1dFrameContext *f; 3329 int res = -1; 3330 3331 // wait for c->out_delayed[next] and move into c->out if visible 3332 Dav1dThreadPicture *out_delayed; 3333 if (c->n_fc > 1) { 3334 pthread_mutex_lock(&c->task_thread.lock); 3335 const unsigned next = c->frame_thread.next++; 3336 if (c->frame_thread.next == c->n_fc) 3337 c->frame_thread.next = 0; 3338 3339 f = &c->fc[next]; 3340 while (f->n_tile_data > 0) 3341 pthread_cond_wait(&f->task_thread.cond, 3342 &c->task_thread.lock); 3343 out_delayed = &c->frame_thread.out_delayed[next]; 3344 if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) { 3345 unsigned first = atomic_load(&c->task_thread.first); 3346 if (first + 1U < c->n_fc) 3347 atomic_fetch_add(&c->task_thread.first, 1U); 3348 else 3349 atomic_store(&c->task_thread.first, 0); 3350 atomic_compare_exchange_strong(&c->task_thread.reset_task_cur, 3351 &first, UINT_MAX); 3352 if (c->task_thread.cur && c->task_thread.cur < c->n_fc) 3353 c->task_thread.cur--; 3354 } 3355 const int error = f->task_thread.retval; 3356 if (error) { 3357 f->task_thread.retval = 0; 3358 c->cached_error = error; 3359 dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m); 3360 dav1d_thread_picture_unref(out_delayed); 3361 } else if (out_delayed->p.data[0]) { 3362 const unsigned progress = atomic_load_explicit(&out_delayed->progress[1], 3363 memory_order_relaxed); 3364 if ((out_delayed->visible || c->output_invisible_frames) && 3365 progress != FRAME_ERROR) 3366 { 3367 dav1d_thread_picture_ref(&c->out, out_delayed); 3368 c->event_flags |= dav1d_picture_get_event_flags(out_delayed); 3369 } 3370 dav1d_thread_picture_unref(out_delayed); 3371 } 3372 } else { 3373 f = c->fc; 3374 } 3375 3376 f->seq_hdr = c->seq_hdr; 3377 f->seq_hdr_ref = c->seq_hdr_ref; 3378 dav1d_ref_inc(f->seq_hdr_ref); 3379 f->frame_hdr = c->frame_hdr; 3380 f->frame_hdr_ref = c->frame_hdr_ref; 3381 c->frame_hdr = NULL; 3382 c->frame_hdr_ref = NULL; 3383 f->dsp = &c->dsp[f->seq_hdr->hbd]; 3384 3385 const int bpc = 8 + 2 * f->seq_hdr->hbd; 3386 3387 if (!f->dsp->ipred.intra_pred[DC_PRED]) { 3388 Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd]; 3389 3390 switch (bpc) { 3391 #define assign_bitdepth_case(bd) \ 3392 dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \ 3393 dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \ 3394 dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \ 3395 dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \ 3396 dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \ 3397 dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \ 3398 dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \ 3399 break 3400 #if CONFIG_8BPC 3401 case 8: 3402 assign_bitdepth_case(8); 3403 #endif 3404 #if CONFIG_16BPC 3405 case 10: 3406 case 12: 3407 assign_bitdepth_case(16); 3408 #endif 3409 #undef assign_bitdepth_case 3410 default: 3411 dav1d_log(c, "Compiled without support for %d-bit decoding\n", 3412 8 + 2 * f->seq_hdr->hbd); 3413 res = DAV1D_ERR(ENOPROTOOPT); 3414 goto error; 3415 } 3416 } 3417 3418 #define assign_bitdepth_case(bd) \ 3419 f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \ 3420 f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \ 3421 f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \ 3422 f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \ 3423 f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \ 3424 f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \ 3425 f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \ 3426 f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \ 3427 f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \ 3428 f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \ 3429 f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \ 3430 f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \ 3431 f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \ 3432 f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc 3433 if (!f->seq_hdr->hbd) { 3434 #if CONFIG_8BPC 3435 assign_bitdepth_case(8); 3436 #endif 3437 } else { 3438 #if CONFIG_16BPC 3439 assign_bitdepth_case(16); 3440 #endif 3441 } 3442 #undef assign_bitdepth_case 3443 3444 int ref_coded_width[7]; 3445 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { 3446 if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) { 3447 const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame]; 3448 if (!c->refs[pri_ref].p.p.data[0]) { 3449 res = DAV1D_ERR(EINVAL); 3450 goto error; 3451 } 3452 } 3453 for (int i = 0; i < 7; i++) { 3454 const int refidx = f->frame_hdr->refidx[i]; 3455 if (!c->refs[refidx].p.p.data[0] || 3456 f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w || 3457 f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h || 3458 f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 || 3459 f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 || 3460 f->seq_hdr->layout != c->refs[refidx].p.p.p.layout || 3461 bpc != c->refs[refidx].p.p.p.bpc) 3462 { 3463 for (int j = 0; j < i; j++) 3464 dav1d_thread_picture_unref(&f->refp[j]); 3465 res = DAV1D_ERR(EINVAL); 3466 goto error; 3467 } 3468 dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p); 3469 ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0]; 3470 if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w || 3471 f->frame_hdr->height != c->refs[refidx].p.p.p.h) 3472 { 3473 #define scale_fac(ref_sz, this_sz) \ 3474 ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz)) 3475 f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w, 3476 f->frame_hdr->width[0]); 3477 f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h, 3478 f->frame_hdr->height); 3479 f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4; 3480 f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4; 3481 } else { 3482 f->svc[i][0].scale = f->svc[i][1].scale = 0; 3483 } 3484 f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION && 3485 !f->frame_hdr->force_integer_mv && 3486 !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) && 3487 !f->svc[i][0].scale; 3488 } 3489 } 3490 3491 // setup entropy 3492 if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) { 3493 dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac); 3494 } else { 3495 const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame]; 3496 dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]); 3497 } 3498 if (f->frame_hdr->refresh_context) { 3499 res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1); 3500 if (res < 0) goto error; 3501 } 3502 3503 // FIXME qsort so tiles are in order (for frame threading) 3504 if (f->n_tile_data_alloc < c->n_tile_data) { 3505 dav1d_free(f->tile); 3506 assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile)); 3507 f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile)); 3508 if (!f->tile) { 3509 f->n_tile_data_alloc = f->n_tile_data = 0; 3510 res = DAV1D_ERR(ENOMEM); 3511 goto error; 3512 } 3513 f->n_tile_data_alloc = c->n_tile_data; 3514 } 3515 memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile)); 3516 memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile)); 3517 f->n_tile_data = c->n_tile_data; 3518 c->n_tile_data = 0; 3519 3520 // allocate frame 3521 res = dav1d_thread_picture_alloc(c, f, bpc); 3522 if (res < 0) goto error; 3523 3524 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { 3525 res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p); 3526 if (res < 0) goto error; 3527 } else { 3528 dav1d_picture_ref(&f->cur, &f->sr_cur.p); 3529 } 3530 3531 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) { 3532 f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w); 3533 const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; 3534 const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor; 3535 const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor; 3536 f->resize_step[1] = scale_fac(in_cw, out_cw); 3537 #undef scale_fac 3538 f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]); 3539 f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]); 3540 } 3541 3542 // move f->cur into output queue 3543 if (c->n_fc == 1) { 3544 if (f->frame_hdr->show_frame || c->output_invisible_frames) { 3545 dav1d_thread_picture_ref(&c->out, &f->sr_cur); 3546 c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur); 3547 } 3548 } else { 3549 dav1d_thread_picture_ref(out_delayed, &f->sr_cur); 3550 } 3551 3552 f->w4 = (f->frame_hdr->width[0] + 3) >> 2; 3553 f->h4 = (f->frame_hdr->height + 3) >> 2; 3554 f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1; 3555 f->bh = ((f->frame_hdr->height + 7) >> 3) << 1; 3556 f->sb128w = (f->bw + 31) >> 5; 3557 f->sb128h = (f->bh + 31) >> 5; 3558 f->sb_shift = 4 + f->seq_hdr->sb128; 3559 f->sb_step = 16 << f->seq_hdr->sb128; 3560 f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift; 3561 f->b4_stride = (f->bw + 31) & ~31; 3562 f->bitdepth_max = (1 << f->cur.p.bpc) - 1; 3563 atomic_init(&f->task_thread.error, 0); 3564 const int uses_2pass = c->n_fc > 1; 3565 const int cols = f->frame_hdr->tiling.cols; 3566 const int rows = f->frame_hdr->tiling.rows; 3567 atomic_store(&f->task_thread.task_counter, 3568 (cols * rows + f->sbh) << uses_2pass); 3569 3570 // ref_mvs 3571 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) { 3572 f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool, 3573 sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1)); 3574 if (!f->mvs_ref) { 3575 res = DAV1D_ERR(ENOMEM); 3576 goto error; 3577 } 3578 f->mvs = f->mvs_ref->data; 3579 if (!f->frame_hdr->allow_intrabc) { 3580 for (int i = 0; i < 7; i++) 3581 f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset; 3582 } else { 3583 memset(f->refpoc, 0, sizeof(f->refpoc)); 3584 } 3585 if (f->frame_hdr->use_ref_frame_mvs) { 3586 for (int i = 0; i < 7; i++) { 3587 const int refidx = f->frame_hdr->refidx[i]; 3588 const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1; 3589 const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1; 3590 if (c->refs[refidx].refmvs != NULL && 3591 ref_w == f->bw && ref_h == f->bh) 3592 { 3593 f->ref_mvs_ref[i] = c->refs[refidx].refmvs; 3594 dav1d_ref_inc(f->ref_mvs_ref[i]); 3595 f->ref_mvs[i] = c->refs[refidx].refmvs->data; 3596 } else { 3597 f->ref_mvs[i] = NULL; 3598 f->ref_mvs_ref[i] = NULL; 3599 } 3600 memcpy(f->refrefpoc[i], c->refs[refidx].refpoc, 3601 sizeof(*f->refrefpoc)); 3602 } 3603 } else { 3604 memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref)); 3605 } 3606 } else { 3607 f->mvs_ref = NULL; 3608 memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref)); 3609 } 3610 3611 // segmap 3612 if (f->frame_hdr->segmentation.enabled) { 3613 // By default, the previous segmentation map is not initialised. 3614 f->prev_segmap_ref = NULL; 3615 f->prev_segmap = NULL; 3616 3617 // We might need a previous frame's segmentation map. This 3618 // happens if there is either no update or a temporal update. 3619 if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) { 3620 const int pri_ref = f->frame_hdr->primary_ref_frame; 3621 assert(pri_ref != DAV1D_PRIMARY_REF_NONE); 3622 const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1; 3623 const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1; 3624 if (ref_w == f->bw && ref_h == f->bh) { 3625 f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap; 3626 if (f->prev_segmap_ref) { 3627 dav1d_ref_inc(f->prev_segmap_ref); 3628 f->prev_segmap = f->prev_segmap_ref->data; 3629 } 3630 } 3631 } 3632 3633 if (f->frame_hdr->segmentation.update_map) { 3634 // We're updating an existing map, but need somewhere to 3635 // put the new values. Allocate them here (the data 3636 // actually gets set elsewhere) 3637 f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, 3638 sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h); 3639 if (!f->cur_segmap_ref) { 3640 dav1d_ref_dec(&f->prev_segmap_ref); 3641 res = DAV1D_ERR(ENOMEM); 3642 goto error; 3643 } 3644 f->cur_segmap = f->cur_segmap_ref->data; 3645 } else if (f->prev_segmap_ref) { 3646 // We're not updating an existing map, and we have a valid 3647 // reference. Use that. 3648 f->cur_segmap_ref = f->prev_segmap_ref; 3649 dav1d_ref_inc(f->cur_segmap_ref); 3650 f->cur_segmap = f->prev_segmap_ref->data; 3651 } else { 3652 // We need to make a new map. Allocate one here and zero it out. 3653 const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h; 3654 f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size); 3655 if (!f->cur_segmap_ref) { 3656 res = DAV1D_ERR(ENOMEM); 3657 goto error; 3658 } 3659 f->cur_segmap = f->cur_segmap_ref->data; 3660 memset(f->cur_segmap, 0, segmap_size); 3661 } 3662 } else { 3663 f->cur_segmap = NULL; 3664 f->cur_segmap_ref = NULL; 3665 f->prev_segmap_ref = NULL; 3666 } 3667 3668 // update references etc. 3669 const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags; 3670 for (int i = 0; i < 8; i++) { 3671 if (refresh_frame_flags & (1 << i)) { 3672 if (c->refs[i].p.p.frame_hdr) 3673 dav1d_thread_picture_unref(&c->refs[i].p); 3674 dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur); 3675 3676 dav1d_cdf_thread_unref(&c->cdf[i]); 3677 if (f->frame_hdr->refresh_context) { 3678 dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf); 3679 } else { 3680 dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf); 3681 } 3682 3683 dav1d_ref_dec(&c->refs[i].segmap); 3684 c->refs[i].segmap = f->cur_segmap_ref; 3685 if (f->cur_segmap_ref) 3686 dav1d_ref_inc(f->cur_segmap_ref); 3687 dav1d_ref_dec(&c->refs[i].refmvs); 3688 if (!f->frame_hdr->allow_intrabc) { 3689 c->refs[i].refmvs = f->mvs_ref; 3690 if (f->mvs_ref) 3691 dav1d_ref_inc(f->mvs_ref); 3692 } 3693 memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc)); 3694 } 3695 } 3696 3697 if (c->n_fc == 1) { 3698 if ((res = dav1d_decode_frame(f)) < 0) { 3699 dav1d_thread_picture_unref(&c->out); 3700 for (int i = 0; i < 8; i++) { 3701 if (refresh_frame_flags & (1 << i)) { 3702 if (c->refs[i].p.p.frame_hdr) 3703 dav1d_thread_picture_unref(&c->refs[i].p); 3704 dav1d_cdf_thread_unref(&c->cdf[i]); 3705 dav1d_ref_dec(&c->refs[i].segmap); 3706 dav1d_ref_dec(&c->refs[i].refmvs); 3707 } 3708 } 3709 goto error; 3710 } 3711 } else { 3712 dav1d_task_frame_init(f); 3713 pthread_mutex_unlock(&c->task_thread.lock); 3714 } 3715 3716 return 0; 3717 error: 3718 atomic_init(&f->task_thread.error, 1); 3719 dav1d_cdf_thread_unref(&f->in_cdf); 3720 if (f->frame_hdr->refresh_context) 3721 dav1d_cdf_thread_unref(&f->out_cdf); 3722 for (int i = 0; i < 7; i++) { 3723 if (f->refp[i].p.frame_hdr) 3724 dav1d_thread_picture_unref(&f->refp[i]); 3725 dav1d_ref_dec(&f->ref_mvs_ref[i]); 3726 } 3727 if (c->n_fc == 1) 3728 dav1d_thread_picture_unref(&c->out); 3729 else 3730 dav1d_thread_picture_unref(out_delayed); 3731 dav1d_picture_unref_internal(&f->cur); 3732 dav1d_thread_picture_unref(&f->sr_cur); 3733 dav1d_ref_dec(&f->mvs_ref); 3734 dav1d_ref_dec(&f->seq_hdr_ref); 3735 dav1d_ref_dec(&f->frame_hdr_ref); 3736 dav1d_data_props_copy(&c->cached_error_props, &c->in.m); 3737 3738 for (int i = 0; i < f->n_tile_data; i++) 3739 dav1d_data_unref_internal(&f->tile[i].data); 3740 f->n_tile_data = 0; 3741 3742 if (c->n_fc > 1) 3743 pthread_mutex_unlock(&c->task_thread.lock); 3744 3745 return res; 3746 }