tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lf_apply_tmpl.c (21784B)


      1 /*
      2 * Copyright © 2018, VideoLAN and dav1d authors
      3 * Copyright © 2018, Two Orioles, LLC
      4 * All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions are met:
      8 *
      9 * 1. Redistributions of source code must retain the above copyright notice, this
     10 *    list of conditions and the following disclaimer.
     11 *
     12 * 2. Redistributions in binary form must reproduce the above copyright notice,
     13 *    this list of conditions and the following disclaimer in the documentation
     14 *    and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #include "config.h"
     29 
     30 #include <string.h>
     31 
     32 #include "common/intops.h"
     33 
     34 #include "src/lf_apply.h"
     35 #include "src/lr_apply.h"
     36 
     37 // The loop filter buffer stores 12 rows of pixels. A superblock block will
     38 // contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
     39 // and 2 below) the final 4 rows are used to swap the bottom of the last
     40 // stripe with the top of the next super block row.
     41 static void backup_lpf(const Dav1dFrameContext *const f,
     42                       pixel *dst, const ptrdiff_t dst_stride,
     43                       const pixel *src, const ptrdiff_t src_stride,
     44                       const int ss_ver, const int sb128,
     45                       int row, const int row_h, const int src_w,
     46                       const int h, const int ss_hor, const int lr_backup)
     47 {
     48    const int cdef_backup = !lr_backup;
     49    const int dst_w = f->frame_hdr->super_res.enabled ?
     50                      (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
     51 
     52    // The first stripe of the frame is shorter by 8 luma pixel rows.
     53    int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
     54    src += (stripe_h - 2) * PXSTRIDE(src_stride);
     55 
     56    if (f->c->n_tc == 1) {
     57        if (row) {
     58            const int top = 4 << sb128;
     59            // Copy the top part of the stored loop filtered pixels from the
     60            // previous sb row needed above the first stripe of this sb row.
     61            pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
     62                       &dst[PXSTRIDE(dst_stride) *  top],      dst_w);
     63            pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
     64                       &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
     65            pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
     66                       &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
     67            pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
     68                       &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
     69        }
     70        dst += 4 * PXSTRIDE(dst_stride);
     71    }
     72 
     73    if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
     74        while (row + stripe_h <= row_h) {
     75            const int n_lines = 4 - (row + stripe_h + 1 == h);
     76            f->dsp->mc.resize(dst, dst_stride, src, src_stride,
     77                              dst_w, n_lines, src_w, f->resize_step[ss_hor],
     78                              f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
     79            row += stripe_h; // unmodified stripe_h for the 1st stripe
     80            stripe_h = 64 >> ss_ver;
     81            src += stripe_h * PXSTRIDE(src_stride);
     82            dst += n_lines * PXSTRIDE(dst_stride);
     83            if (n_lines == 3) {
     84                pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
     85                dst += PXSTRIDE(dst_stride);
     86            }
     87        }
     88    } else {
     89        while (row + stripe_h <= row_h) {
     90            const int n_lines = 4 - (row + stripe_h + 1 == h);
     91            for (int i = 0; i < 4; i++) {
     92                pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
     93                                               src, src_w);
     94                dst += PXSTRIDE(dst_stride);
     95                src += PXSTRIDE(src_stride);
     96            }
     97            row += stripe_h; // unmodified stripe_h for the 1st stripe
     98            stripe_h = 64 >> ss_ver;
     99            src += (stripe_h - 4) * PXSTRIDE(src_stride);
    100        }
    101    }
    102 }
    103 
    104 void bytefn(dav1d_copy_lpf)(Dav1dFrameContext *const f,
    105                            /*const*/ pixel *const src[3], const int sby)
    106 {
    107    const int have_tt = f->c->n_tc > 1;
    108    const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
    109    const int offset = 8 * !!sby;
    110    const ptrdiff_t *const src_stride = f->cur.stride;
    111    const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
    112    const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
    113    pixel *const dst[3] = {
    114        f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
    115        f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
    116        f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
    117    };
    118 
    119    // TODO Also check block level restore type to reduce copying.
    120    const int restore_planes = f->lf.restore_planes;
    121 
    122    if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
    123        const int h = f->cur.p.h;
    124        const int w = f->bw << 2;
    125        const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
    126        const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
    127        if (restore_planes & LR_RESTORE_Y || !resize)
    128            backup_lpf(f, dst[0], lr_stride[0],
    129                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
    130                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
    131        if (have_tt && resize) {
    132            const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
    133            backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
    134                       src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
    135                       0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
    136        }
    137    }
    138    if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
    139        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
    140    {
    141        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
    142        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
    143        const int h = (f->cur.p.h + ss_ver) >> ss_ver;
    144        const int w = f->bw << (2 - ss_hor);
    145        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
    146        const int offset_uv = offset >> ss_ver;
    147        const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
    148        const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
    149        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
    150            if (restore_planes & LR_RESTORE_U || !resize)
    151                backup_lpf(f, dst[1], lr_stride[1],
    152                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
    153                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
    154                           row_h, w, h, ss_hor, 1);
    155            if (have_tt && resize)
    156                backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
    157                           src[1] - offset_uv * PXSTRIDE(src_stride[1]),
    158                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
    159                           row_h, w, h, ss_hor, 0);
    160        }
    161        if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
    162            if (restore_planes & LR_RESTORE_V || !resize)
    163                backup_lpf(f, dst[2], lr_stride[1],
    164                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
    165                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
    166                           row_h, w, h, ss_hor, 1);
    167            if (have_tt && resize)
    168                backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
    169                           src[2] - offset_uv * PXSTRIDE(src_stride[1]),
    170                           src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
    171                           row_h, w, h, ss_hor, 0);
    172        }
    173    }
    174 }
    175 
    176 static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
    177                                       const int have_left,
    178                                       const uint8_t (*lvl)[4],
    179                                       const ptrdiff_t b4_stride,
    180                                       const uint16_t (*const mask)[3][2],
    181                                       pixel *dst, const ptrdiff_t ls,
    182                                       const int w,
    183                                       const int starty4, const int endy4)
    184 {
    185    const Dav1dDSPContext *const dsp = f->dsp;
    186 
    187    // filter edges between columns (e.g. block1 | block2)
    188    for (int x = 0; x < w; x++) {
    189        if (!have_left && !x) continue;
    190        uint32_t hmask[4];
    191        if (!starty4) {
    192            hmask[0] = mask[x][0][0];
    193            hmask[1] = mask[x][1][0];
    194            hmask[2] = mask[x][2][0];
    195            if (endy4 > 16) {
    196                hmask[0] |= (unsigned) mask[x][0][1] << 16;
    197                hmask[1] |= (unsigned) mask[x][1][1] << 16;
    198                hmask[2] |= (unsigned) mask[x][2][1] << 16;
    199            }
    200        } else {
    201            hmask[0] = mask[x][0][1];
    202            hmask[1] = mask[x][1][1];
    203            hmask[2] = mask[x][2][1];
    204        }
    205        hmask[3] = 0;
    206        dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
    207                                     (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
    208                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
    209    }
    210 }
    211 
    212 static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
    213                                       const int have_top,
    214                                       const uint8_t (*lvl)[4],
    215                                       const ptrdiff_t b4_stride,
    216                                       const uint16_t (*const mask)[3][2],
    217                                       pixel *dst, const ptrdiff_t ls,
    218                                       const int w,
    219                                       const int starty4, const int endy4)
    220 {
    221    const Dav1dDSPContext *const dsp = f->dsp;
    222 
    223    //                                 block1
    224    // filter edges between rows (e.g. ------)
    225    //                                 block2
    226    for (int y = starty4; y < endy4;
    227         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
    228    {
    229        if (!have_top && !y) continue;
    230        const uint32_t vmask[4] = {
    231            mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
    232            mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
    233            mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
    234            0,
    235        };
    236        dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
    237                                     (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
    238                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
    239    }
    240 }
    241 
    242 static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
    243                                        const int have_left,
    244                                        const uint8_t (*lvl)[4],
    245                                        const ptrdiff_t b4_stride,
    246                                        const uint16_t (*const mask)[2][2],
    247                                        pixel *const u, pixel *const v,
    248                                        const ptrdiff_t ls, const int w,
    249                                        const int starty4, const int endy4,
    250                                        const int ss_ver)
    251 {
    252    const Dav1dDSPContext *const dsp = f->dsp;
    253 
    254    // filter edges between columns (e.g. block1 | block2)
    255    for (int x = 0; x < w; x++) {
    256        if (!have_left && !x) continue;
    257        uint32_t hmask[3];
    258        if (!starty4) {
    259            hmask[0] = mask[x][0][0];
    260            hmask[1] = mask[x][1][0];
    261            if (endy4 > (16 >> ss_ver)) {
    262                hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
    263                hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
    264            }
    265        } else {
    266            hmask[0] = mask[x][0][1];
    267            hmask[1] = mask[x][1][1];
    268        }
    269        hmask[2] = 0;
    270        dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
    271                                     (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
    272                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
    273        dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
    274                                     (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
    275                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
    276    }
    277 }
    278 
    279 static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
    280                                        const int have_top,
    281                                        const uint8_t (*lvl)[4],
    282                                        const ptrdiff_t b4_stride,
    283                                        const uint16_t (*const mask)[2][2],
    284                                        pixel *const u, pixel *const v,
    285                                        const ptrdiff_t ls, const int w,
    286                                        const int starty4, const int endy4,
    287                                        const int ss_hor)
    288 {
    289    const Dav1dDSPContext *const dsp = f->dsp;
    290    ptrdiff_t off_l = 0;
    291 
    292    //                                 block1
    293    // filter edges between rows (e.g. ------)
    294    //                                 block2
    295    for (int y = starty4; y < endy4;
    296         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
    297    {
    298        if (!have_top && !y) continue;
    299        const uint32_t vmask[3] = {
    300            mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
    301            mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
    302            0,
    303        };
    304        dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
    305                                     (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
    306                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
    307        dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
    308                                     (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
    309                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
    310    }
    311 }
    312 
    313 void bytefn(dav1d_loopfilter_sbrow_cols)(const Dav1dFrameContext *const f,
    314                                         pixel *const p[3], Av1Filter *const lflvl,
    315                                         int sby, const int start_of_tile_row)
    316 {
    317    int x, have_left;
    318    // Don't filter outside the frame
    319    const int is_sb64 = !f->seq_hdr->sb128;
    320    const int starty4 = (sby & is_sb64) << 4;
    321    const int sbsz = 32 >> is_sb64;
    322    const int sbl2 = 5 - is_sb64;
    323    const int halign = (f->bh + 31) & ~31;
    324    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
    325    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
    326    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
    327    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
    328    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
    329    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
    330 
    331    // fix lpf strength at tile col boundaries
    332    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
    333    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
    334    for (int tile_col = 1;; tile_col++) {
    335        x = f->frame_hdr->tiling.col_start_sb[tile_col];
    336        if ((x << sbl2) >= f->bw) break;
    337        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
    338        x >>= is_sb64;
    339 
    340        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
    341        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
    342            const int sidx = mask >= 0x10000U;
    343            const unsigned smask = mask >> (sidx << 4);
    344            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
    345                                !!(y_hmask[1][sidx] & smask);
    346            y_hmask[2][sidx] &= ~smask;
    347            y_hmask[1][sidx] &= ~smask;
    348            y_hmask[0][sidx] &= ~smask;
    349            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
    350        }
    351 
    352        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
    353            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
    354            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
    355                 y++, uv_mask <<= 1)
    356            {
    357                const int sidx = uv_mask >= vmax;
    358                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
    359                const int idx = !!(uv_hmask[1][sidx] & smask);
    360                uv_hmask[1][sidx] &= ~smask;
    361                uv_hmask[0][sidx] &= ~smask;
    362                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
    363            }
    364        }
    365        lpf_y  += halign;
    366        lpf_uv += halign >> ss_ver;
    367    }
    368 
    369    // fix lpf strength at tile row boundaries
    370    if (start_of_tile_row) {
    371        const BlockContext *a;
    372        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
    373             x < f->sb128w; x++, a++)
    374        {
    375            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
    376            const unsigned w = imin(32, f->w4 - (x << 5));
    377            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
    378                const int sidx = mask >= 0x10000U;
    379                const unsigned smask = mask >> (sidx << 4);
    380                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
    381                                    !!(y_vmask[1][sidx] & smask);
    382                y_vmask[2][sidx] &= ~smask;
    383                y_vmask[1][sidx] &= ~smask;
    384                y_vmask[0][sidx] &= ~smask;
    385                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
    386            }
    387 
    388            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
    389                const unsigned cw = (w + ss_hor) >> ss_hor;
    390                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
    391                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
    392                    const int sidx = uv_mask >= hmax;
    393                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
    394                    const int idx = !!(uv_vmask[1][sidx] & smask);
    395                    uv_vmask[1][sidx] &= ~smask;
    396                    uv_vmask[0][sidx] &= ~smask;
    397                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
    398                }
    399            }
    400        }
    401    }
    402 
    403    pixel *ptr;
    404    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
    405    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
    406         x++, have_left = 1, ptr += 128, level_ptr += 32)
    407    {
    408        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
    409                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
    410                            imin(32, f->w4 - x * 32), starty4, endy4);
    411    }
    412 
    413    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
    414        return;
    415 
    416    ptrdiff_t uv_off;
    417    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
    418    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
    419         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
    420    {
    421        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
    422                             lflvl[x].filter_uv[0],
    423                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
    424                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
    425                             starty4 >> ss_ver, uv_endy4, ss_ver);
    426    }
    427 }
    428 
    429 void bytefn(dav1d_loopfilter_sbrow_rows)(const Dav1dFrameContext *const f,
    430                                         pixel *const p[3], Av1Filter *const lflvl,
    431                                         int sby)
    432 {
    433    int x;
    434    // Don't filter outside the frame
    435    const int have_top = sby > 0;
    436    const int is_sb64 = !f->seq_hdr->sb128;
    437    const int starty4 = (sby & is_sb64) << 4;
    438    const int sbsz = 32 >> is_sb64;
    439    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
    440    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
    441    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
    442    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
    443 
    444    pixel *ptr;
    445    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
    446    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
    447        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
    448                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
    449                            imin(32, f->w4 - x * 32), starty4, endy4);
    450    }
    451 
    452    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
    453        return;
    454 
    455    ptrdiff_t uv_off;
    456    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
    457    for (uv_off = 0, x = 0; x < f->sb128w;
    458         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
    459    {
    460        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
    461                             lflvl[x].filter_uv[1],
    462                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
    463                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
    464                             starty4 >> ss_ver, uv_endy4, ss_hor);
    465    }
    466 }