tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

looprestoration_inner.c (15203B)


      1 /*
      2 * Copyright © 2023, VideoLAN and dav1d authors
      3 * Copyright © 2023, Loongson Technology Corporation Limited
      4 * All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions are met:
      8 *
      9 * 1. Redistributions of source code must retain the above copyright notice, this
     10 *    list of conditions and the following disclaimer.
     11 *
     12 * 2. Redistributions in binary form must reproduce the above copyright notice,
     13 *    this list of conditions and the following disclaimer in the documentation
     14 *    and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #include "src/loongarch/looprestoration.h"
     29 
     30 #if BITDEPTH == 8
     31 
     32 #define REST_UNIT_STRIDE (400)
     33 
     34 void BF(dav1d_wiener_filter_h, lsx)(int32_t *hor_ptr,
     35                                    uint8_t *tmp_ptr,
     36                                    const int16_t filterh[8],
     37                                    const int w, const int h);
     38 
     39 void BF(dav1d_wiener_filter_h, lasx)(int32_t *hor_ptr,
     40                                     uint8_t *tmp_ptr,
     41                                     const int16_t filterh[8],
     42                                     const int w, const int h);
     43 
     44 void BF(dav1d_wiener_filter_v, lsx)(uint8_t *p,
     45                                    const ptrdiff_t p_stride,
     46                                    const int32_t *hor,
     47                                    const int16_t filterv[8],
     48                                    const int w, const int h);
     49 
     50 void BF(dav1d_wiener_filter_v, lasx)(uint8_t *p,
     51                                     const ptrdiff_t p_stride,
     52                                     const int32_t *hor,
     53                                     const int16_t filterv[8],
     54                                     const int w, const int h);
     55 
     56 // This function refers to the function in the ppc/looprestoration_init_tmpl.c.
     57 static inline void padding(uint8_t *dst, const uint8_t *p,
     58                           const ptrdiff_t stride, const uint8_t (*left)[4],
     59                           const uint8_t *lpf, int unit_w, const int stripe_h,
     60                           const enum LrEdgeFlags edges)
     61 {
     62    const int have_left = !!(edges & LR_HAVE_LEFT);
     63    const int have_right = !!(edges & LR_HAVE_RIGHT);
     64 
     65    // Copy more pixels if we don't have to pad them
     66    unit_w += 3 * have_left + 3 * have_right;
     67    uint8_t *dst_l = dst + 3 * !have_left;
     68    p -= 3 * have_left;
     69    lpf -= 3 * have_left;
     70 
     71    if (edges & LR_HAVE_TOP) {
     72        // Copy previous loop filtered rows
     73        const uint8_t *const above_1 = lpf;
     74        const uint8_t *const above_2 = above_1 + PXSTRIDE(stride);
     75        pixel_copy(dst_l, above_1, unit_w);
     76        pixel_copy(dst_l + REST_UNIT_STRIDE, above_1, unit_w);
     77        pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, above_2, unit_w);
     78    } else {
     79        // Pad with first row
     80        pixel_copy(dst_l, p, unit_w);
     81        pixel_copy(dst_l + REST_UNIT_STRIDE, p, unit_w);
     82        pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, p, unit_w);
     83        if (have_left) {
     84            pixel_copy(dst_l, &left[0][1], 3);
     85            pixel_copy(dst_l + REST_UNIT_STRIDE, &left[0][1], 3);
     86            pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, &left[0][1], 3);
     87        }
     88    }
     89 
     90    uint8_t *dst_tl = dst_l + 3 * REST_UNIT_STRIDE;
     91    if (edges & LR_HAVE_BOTTOM) {
     92        // Copy next loop filtered rows
     93        const uint8_t *const below_1 = lpf + 6 * PXSTRIDE(stride);
     94        const uint8_t *const below_2 = below_1 + PXSTRIDE(stride);
     95        pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, below_1, unit_w);
     96        pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, below_2, unit_w);
     97        pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, below_2, unit_w);
     98    } else {
     99        // Pad with last row
    100        const uint8_t *const src = p + (stripe_h - 1) * PXSTRIDE(stride);
    101        pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, src, unit_w);
    102        pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, src, unit_w);
    103        pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, src, unit_w);
    104        if (have_left) {
    105            pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
    106            pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
    107            pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
    108        }
    109    }
    110 
    111    // Inner UNIT_WxSTRIPE_H
    112    for (int j = 0; j < stripe_h; j++) {
    113        pixel_copy(dst_tl + 3 * have_left, p + 3 * have_left, unit_w - 3 * have_left);
    114        dst_tl += REST_UNIT_STRIDE;
    115        p += PXSTRIDE(stride);
    116    }
    117 
    118    if (!have_right) {
    119        uint8_t *pad = dst_l + unit_w;
    120        uint8_t *row_last = &dst_l[unit_w - 1];
    121        // Pad 3x(STRIPE_H+6) with last column
    122        for (int j = 0; j < stripe_h + 6; j++) {
    123            pixel_set(pad, *row_last, 3);
    124            pad += REST_UNIT_STRIDE;
    125            row_last += REST_UNIT_STRIDE;
    126        }
    127    }
    128 
    129    if (!have_left) {
    130        // Pad 3x(STRIPE_H+6) with first column
    131        for (int j = 0; j < stripe_h + 6; j++) {
    132            pixel_set(dst, *dst_l, 3);
    133            dst += REST_UNIT_STRIDE;
    134            dst_l += REST_UNIT_STRIDE;
    135        }
    136    } else {
    137        dst += 3 * REST_UNIT_STRIDE;
    138        for (int j = 0; j < stripe_h; j++) {
    139            pixel_copy(dst, &left[j][1], 3);
    140            dst += REST_UNIT_STRIDE;
    141        }
    142    }
    143 }
    144 
    145 // This function refers to the function in the ppc/looprestoration_init_tmpl.c.
    146 
    147 // FIXME Could split into luma and chroma specific functions,
    148 // (since first and last tops are always 0 for chroma)
    149 // FIXME Could implement a version that requires less temporary memory
    150 // (should be possible to implement with only 6 rows of temp storage)
    151 void dav1d_wiener_filter_lsx(uint8_t *p, const ptrdiff_t p_stride,
    152                              const uint8_t (*const left)[4],
    153                              const uint8_t *lpf,
    154                              const int w, const int h,
    155                              const LooprestorationParams *const params,
    156                              const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
    157 {
    158    const int16_t (*const filter)[8] = params->filter;
    159 
    160    // Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels
    161    // of padding above and below
    162    ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
    163    padding(tmp, p, p_stride, left, lpf, w, h, edges);
    164    ALIGN_STK_16(int32_t, hor, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE + 64,);
    165 
    166    BF(dav1d_wiener_filter_h, lsx)(hor, tmp, filter[0], w, h + 6);
    167    BF(dav1d_wiener_filter_v, lsx)(p, p_stride, hor, filter[1], w, h);
    168 }
    169 
    170 void dav1d_wiener_filter_lasx(uint8_t *p, const ptrdiff_t p_stride,
    171                              const uint8_t (*const left)[4],
    172                              const uint8_t *lpf,
    173                              const int w, const int h,
    174                              const LooprestorationParams *const params,
    175                              const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
    176 {
    177    const int16_t (*const filter)[8] = params->filter;
    178 
    179    // Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels
    180    // of padding above and below
    181    ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
    182    padding(tmp, p, p_stride, left, lpf, w, h, edges);
    183    ALIGN_STK_16(int32_t, hor, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE + 64,);
    184 
    185    BF(dav1d_wiener_filter_h, lasx)(hor, tmp, filter[0], w, h + 6);
    186    BF(dav1d_wiener_filter_v, lasx)(p, p_stride, hor, filter[1], w, h);
    187 }
    188 
    189 void BF(dav1d_boxsum3_h, lsx)(int32_t *sumsq, int16_t *sum, pixel *src,
    190                              const int w, const int h);
    191 void BF(dav1d_boxsum3_v, lsx)(int32_t *sumsq, int16_t *sum,
    192                              const int w, const int h);
    193 
    194 void BF(dav1d_boxsum3_sgf_h, lsx)(int32_t *sumsq, int16_t *sum,
    195                                  const int w, const int h, const int w1);
    196 void BF(dav1d_boxsum3_sgf_v, lsx)(int16_t *dst, uint8_t *tmp,
    197                                  int32_t *sumsq, int16_t *sum,
    198                                  const int w, const int h);
    199 void BF(dav1d_sgr_3x3_finish, lsx)(pixel *p, const ptrdiff_t p_stride,
    200                                   int16_t *dst, int w1,
    201                                   const int w, const int h);
    202 
    203 void BF(dav1d_boxsum3_h, lasx)(int32_t *sumsq, int16_t *sum, pixel *src,
    204                               const int w, const int h);
    205 void BF(dav1d_boxsum3_sgf_h, lasx)(int32_t *sumsq, int16_t *sum,
    206                                   const int w, const int h, const int w1);
    207 void BF(dav1d_boxsum3_sgf_v, lasx)(int16_t *dst, uint8_t *tmp,
    208                                   int32_t *sumsq, int16_t *sum,
    209                                   const int w, const int h);
    210 
    211 static inline void boxsum3_lsx(int32_t *sumsq, coef *sum, pixel *src,
    212                               const int w, const int h)
    213 {
    214    BF(dav1d_boxsum3_h, lsx)(sumsq, sum, src, w + 6, h + 6);
    215    BF(dav1d_boxsum3_v, lsx)(sumsq, sum, w + 6, h + 6);
    216 }
    217 
    218 static inline void boxsum3_lasx(int32_t *sumsq, coef *sum, pixel *src,
    219                               const int w, const int h)
    220 {
    221    BF(dav1d_boxsum3_h, lasx)(sumsq, sum, src, w + 6, h + 6);
    222    BF(dav1d_boxsum3_v, lsx)(sumsq, sum, w + 6, h + 6);
    223 }
    224 
    225 void dav1d_sgr_filter_3x3_lsx(pixel *p, const ptrdiff_t p_stride,
    226                              const pixel (*const left)[4],
    227                              const pixel *lpf,
    228                              const int w, const int h,
    229                              const LooprestorationParams *const params,
    230                              const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
    231 {
    232    ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
    233    padding(tmp, p, p_stride, left, lpf, w, h, edges);
    234    coef dst[64 * 384];
    235 
    236    ALIGN_STK_16(int32_t, sumsq, 68 * REST_UNIT_STRIDE + 8, );
    237    ALIGN_STK_16(int16_t, sum, 68 * REST_UNIT_STRIDE + 16, );
    238 
    239    boxsum3_lsx(sumsq, sum, tmp, w, h);
    240    BF(dav1d_boxsum3_sgf_h, lsx)(sumsq, sum, w, h, params->sgr.s1);
    241    BF(dav1d_boxsum3_sgf_v, lsx)(dst, tmp, sumsq, sum, w, h);
    242    BF(dav1d_sgr_3x3_finish, lsx)(p, p_stride, dst, params->sgr.w1, w, h);
    243 }
    244 
    245 void dav1d_sgr_filter_3x3_lasx(pixel *p, const ptrdiff_t p_stride,
    246                              const pixel (*const left)[4],
    247                              const pixel *lpf,
    248                              const int w, const int h,
    249                              const LooprestorationParams *const params,
    250                              const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
    251 {
    252    ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
    253    padding(tmp, p, p_stride, left, lpf, w, h, edges);
    254    coef dst[64 * 384];
    255 
    256    ALIGN_STK_16(int32_t, sumsq, 68 * REST_UNIT_STRIDE + 8, );
    257    ALIGN_STK_16(int16_t, sum, 68 * REST_UNIT_STRIDE + 16, );
    258 
    259    boxsum3_lasx(sumsq, sum, tmp, w, h);
    260    BF(dav1d_boxsum3_sgf_h, lasx)(sumsq, sum, w, h, params->sgr.s1);
    261    BF(dav1d_boxsum3_sgf_v, lasx)(dst, tmp, sumsq, sum, w, h);
    262    BF(dav1d_sgr_3x3_finish, lsx)(p, p_stride, dst, params->sgr.w1, w, h);
    263 }
    264 
    265 void BF(dav1d_boxsum5_h, lsx)(int32_t *sumsq, int16_t *sum,
    266                              const uint8_t *const src,
    267                              const int w, const int h);
    268 
    269 void BF(dav1d_boxsum5_v, lsx)(int32_t *sumsq, int16_t *sum,
    270                              const int w, const int h);
    271 
    272 void BF(dav1d_boxsum5_sgf_h, lsx)(int32_t *sumsq, int16_t *sum,
    273                                  const int w, const int h,
    274                                  const unsigned s);
    275 
    276 void BF(dav1d_boxsum5_sgf_v, lsx)(int16_t *dst, uint8_t *src,
    277                                  int32_t *sumsq, int16_t *sum,
    278                                  const int w, const int h);
    279 
    280 void BF(dav1d_sgr_mix_finish, lsx)(uint8_t *p, const ptrdiff_t stride,
    281                                   const int16_t *dst0, const int16_t *dst1,
    282                                   const int w0, const int w1,
    283                                   const int w, const int h);
    284 
    285 static inline void boxsum5_lsx(int32_t *sumsq, coef *sum, pixel *src,
    286                               const int w, const int h)
    287 {
    288    BF(dav1d_boxsum5_h, lsx)(sumsq, sum, src, w + 6, h + 6);
    289    BF(dav1d_boxsum5_v, lsx)(sumsq, sum, w + 6, h + 6);
    290 }
    291 
    292 void dav1d_sgr_filter_5x5_lsx(pixel *p, const ptrdiff_t p_stride,
    293                              const pixel (*const left)[4],
    294                              const pixel *lpf,
    295                              const int w, const int h,
    296                              const LooprestorationParams *const params,
    297                              const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
    298 {
    299    ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
    300    padding(tmp, p, p_stride, left, lpf, w, h, edges);
    301    coef dst[64 * 384];
    302 
    303    ALIGN_STK_16(int32_t, sumsq, 68 * REST_UNIT_STRIDE + 8, );
    304    ALIGN_STK_16(int16_t, sum, 68 * REST_UNIT_STRIDE + 16, );
    305 
    306    boxsum5_lsx(sumsq, sum, tmp, w, h);
    307    BF(dav1d_boxsum5_sgf_h, lsx)(sumsq, sum, w, h, params->sgr.s0);
    308    BF(dav1d_boxsum5_sgf_v, lsx)(dst, tmp, sumsq, sum, w, h);
    309    BF(dav1d_sgr_3x3_finish, lsx)(p, p_stride, dst, params->sgr.w0, w, h);
    310 }
    311 
    312 void dav1d_sgr_filter_mix_lsx(pixel *p, const ptrdiff_t p_stride,
    313                              const pixel (*const left)[4],
    314                              const pixel *lpf,
    315                              const int w, const int h,
    316                              const LooprestorationParams *const params,
    317                              const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
    318 {
    319    ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
    320    padding(tmp, p, p_stride, left, lpf, w, h, edges);
    321    coef dst0[64 * 384];
    322    coef dst1[64 * 384];
    323 
    324    ALIGN_STK_16(int32_t, sumsq0, 68 * REST_UNIT_STRIDE + 8, );
    325    ALIGN_STK_16(int16_t, sum0, 68 * REST_UNIT_STRIDE + 16, );
    326 
    327    boxsum5_lsx(sumsq0, sum0, tmp, w, h);
    328    BF(dav1d_boxsum5_sgf_h, lsx)(sumsq0, sum0, w, h, params->sgr.s0);
    329    BF(dav1d_boxsum5_sgf_v, lsx)(dst0, tmp, sumsq0, sum0, w, h);
    330 
    331    boxsum3_lsx(sumsq0, sum0, tmp, w, h);
    332    BF(dav1d_boxsum3_sgf_h, lsx)(sumsq0, sum0, w, h, params->sgr.s1);
    333    BF(dav1d_boxsum3_sgf_v, lsx)(dst1, tmp, sumsq0, sum0, w, h);
    334 
    335    BF(dav1d_sgr_mix_finish, lsx)(p, p_stride, dst0, dst1, params->sgr.w0,
    336                                   params->sgr.w1, w, h);
    337 }
    338 #endif