tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

filmgrain.h (9404B)


      1 /*
      2 * Copyright © 2018, Niklas Haas
      3 * Copyright © 2018, VideoLAN and dav1d authors
      4 * Copyright © 2018, Two Orioles, LLC
      5 * Copyright © 2021, Martin Storsjo
      6 * All rights reserved.
      7 *
      8 * Redistribution and use in source and binary forms, with or without
      9 * modification, are permitted provided that the following conditions are met:
     10 *
     11 * 1. Redistributions of source code must retain the above copyright notice, this
     12 *    list of conditions and the following disclaimer.
     13 *
     14 * 2. Redistributions in binary form must reproduce the above copyright notice,
     15 *    this list of conditions and the following disclaimer in the documentation
     16 *    and/or other materials provided with the distribution.
     17 *
     18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 */
     29 
     30 #include "src/cpu.h"
     31 #include "src/filmgrain.h"
     32 #include "asm-offsets.h"
     33 
     34 CHECK_OFFSET(Dav1dFilmGrainData, seed, FGD_SEED);
     35 CHECK_OFFSET(Dav1dFilmGrainData, ar_coeff_lag, FGD_AR_COEFF_LAG);
     36 CHECK_OFFSET(Dav1dFilmGrainData, ar_coeffs_y, FGD_AR_COEFFS_Y);
     37 CHECK_OFFSET(Dav1dFilmGrainData, ar_coeffs_uv, FGD_AR_COEFFS_UV);
     38 CHECK_OFFSET(Dav1dFilmGrainData, ar_coeff_shift, FGD_AR_COEFF_SHIFT);
     39 CHECK_OFFSET(Dav1dFilmGrainData, grain_scale_shift, FGD_GRAIN_SCALE_SHIFT);
     40 
     41 CHECK_OFFSET(Dav1dFilmGrainData, scaling_shift, FGD_SCALING_SHIFT);
     42 CHECK_OFFSET(Dav1dFilmGrainData, uv_mult, FGD_UV_MULT);
     43 CHECK_OFFSET(Dav1dFilmGrainData, uv_luma_mult, FGD_UV_LUMA_MULT);
     44 CHECK_OFFSET(Dav1dFilmGrainData, uv_offset, FGD_UV_OFFSET);
     45 CHECK_OFFSET(Dav1dFilmGrainData, clip_to_restricted_range, FGD_CLIP_TO_RESTRICTED_RANGE);
     46 
     47 void BF(dav1d_generate_grain_y, neon)(entry buf[][GRAIN_WIDTH],
     48                                      const Dav1dFilmGrainData *const data
     49                                      HIGHBD_DECL_SUFFIX);
     50 
     51 #define GEN_GRAIN_UV(suff) \
     52 void BF(dav1d_generate_grain_uv_ ## suff, neon)(entry buf[][GRAIN_WIDTH], \
     53                                                const entry buf_y[][GRAIN_WIDTH], \
     54                                                const Dav1dFilmGrainData *const data, \
     55                                                const intptr_t uv \
     56                                                HIGHBD_DECL_SUFFIX)
     57 
     58 GEN_GRAIN_UV(420);
     59 GEN_GRAIN_UV(422);
     60 GEN_GRAIN_UV(444);
     61 
     62 // Use ptrdiff_t instead of int for the last few parameters, to get the
     63 // same layout of parameters on the stack across platforms.
     64 void BF(dav1d_fgy_32x32, neon)(pixel *const dst,
     65                               const pixel *const src,
     66                               const ptrdiff_t stride,
     67                               const uint8_t scaling[SCALING_SIZE],
     68                               const int scaling_shift,
     69                               const entry grain_lut[][GRAIN_WIDTH],
     70                               const int offsets[][2],
     71                               const int h, const ptrdiff_t clip,
     72                               const ptrdiff_t type
     73                               HIGHBD_DECL_SUFFIX);
     74 
     75 static void fgy_32x32xn_neon(pixel *const dst_row, const pixel *const src_row,
     76                             const ptrdiff_t stride,
     77                             const Dav1dFilmGrainData *const data, const size_t pw,
     78                             const uint8_t scaling[SCALING_SIZE],
     79                             const entry grain_lut[][GRAIN_WIDTH],
     80                             const int bh, const int row_num HIGHBD_DECL_SUFFIX)
     81 {
     82    const int rows = 1 + (data->overlap_flag && row_num > 0);
     83 
     84    // seed[0] contains the current row, seed[1] contains the previous
     85    unsigned seed[2];
     86    for (int i = 0; i < rows; i++) {
     87        seed[i] = data->seed;
     88        seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;
     89        seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
     90    }
     91 
     92    int offsets[2 /* col offset */][2 /* row offset */];
     93 
     94    // process this row in FG_BLOCK_SIZE^2 blocks
     95    for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
     96 
     97        if (data->overlap_flag && bx) {
     98            // shift previous offsets left
     99            for (int i = 0; i < rows; i++)
    100                offsets[1][i] = offsets[0][i];
    101        }
    102 
    103        // update current offsets
    104        for (int i = 0; i < rows; i++)
    105            offsets[0][i] = get_random_number(8, &seed[i]);
    106 
    107        int type = 0;
    108        if (data->overlap_flag && row_num)
    109            type |= 1; /* overlap y */
    110        if (data->overlap_flag && bx)
    111            type |= 2; /* overlap x */
    112 
    113        BF(dav1d_fgy_32x32, neon)(dst_row + bx, src_row + bx, stride,
    114                                  scaling, data->scaling_shift,
    115                                  grain_lut, offsets, bh,
    116                                  data->clip_to_restricted_range, type
    117                                  HIGHBD_TAIL_SUFFIX);
    118    }
    119 }
    120 
    121 // Use ptrdiff_t instead of int for the last few parameters, to get the
    122 // parameters on the stack with the same layout across platforms.
    123 #define FGUV(nm, sx, sy) \
    124 void BF(dav1d_fguv_32x32_##nm, neon)(pixel *const dst, \
    125                                     const pixel *const src, \
    126                                     const ptrdiff_t stride, \
    127                                     const uint8_t scaling[SCALING_SIZE], \
    128                                     const Dav1dFilmGrainData *const data, \
    129                                     const entry grain_lut[][GRAIN_WIDTH], \
    130                                     const pixel *const luma_row, \
    131                                     const ptrdiff_t luma_stride, \
    132                                     const int offsets[][2], \
    133                                     const ptrdiff_t h, const ptrdiff_t uv, \
    134                                     const ptrdiff_t is_id, \
    135                                     const ptrdiff_t type \
    136                                     HIGHBD_DECL_SUFFIX); \
    137 static void \
    138 fguv_32x32xn_##nm##_neon(pixel *const dst_row, const pixel *const src_row, \
    139                  const ptrdiff_t stride, const Dav1dFilmGrainData *const data, \
    140                  const size_t pw, const uint8_t scaling[SCALING_SIZE], \
    141                  const entry grain_lut[][GRAIN_WIDTH], const int bh, \
    142                  const int row_num, const pixel *const luma_row, \
    143                  const ptrdiff_t luma_stride, const int uv, const int is_id \
    144                  HIGHBD_DECL_SUFFIX) \
    145 { \
    146    const int rows = 1 + (data->overlap_flag && row_num > 0); \
    147 \
    148    /* seed[0] contains the current row, seed[1] contains the previous */ \
    149    unsigned seed[2]; \
    150    for (int i = 0; i < rows; i++) { \
    151        seed[i] = data->seed; \
    152        seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8; \
    153        seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); \
    154    } \
    155 \
    156    int offsets[2 /* col offset */][2 /* row offset */]; \
    157 \
    158    /* process this row in FG_BLOCK_SIZE^2 blocks (subsampled) */ \
    159    for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { \
    160        if (data->overlap_flag && bx) { \
    161            /* shift previous offsets left */ \
    162            for (int i = 0; i < rows; i++) \
    163                offsets[1][i] = offsets[0][i]; \
    164        } \
    165 \
    166        /* update current offsets */ \
    167        for (int i = 0; i < rows; i++) \
    168            offsets[0][i] = get_random_number(8, &seed[i]); \
    169 \
    170        int type = 0; \
    171        if (data->overlap_flag && row_num) \
    172            type |= 1; /* overlap y */ \
    173        if (data->overlap_flag && bx) \
    174            type |= 2; /* overlap x */ \
    175        if (data->chroma_scaling_from_luma) \
    176            type |= 4; \
    177 \
    178        BF(dav1d_fguv_32x32_##nm, neon)(dst_row + bx, src_row + bx, stride, \
    179                                        scaling, data, grain_lut, \
    180                                        luma_row + (bx << sx), luma_stride, \
    181                                        offsets, bh, uv, is_id, type \
    182                                        HIGHBD_TAIL_SUFFIX); \
    183    } \
    184 }
    185 
    186 FGUV(420, 1, 1);
    187 FGUV(422, 1, 0);
    188 FGUV(444, 0, 0);
    189 
    190 static ALWAYS_INLINE void film_grain_dsp_init_arm(Dav1dFilmGrainDSPContext *const c) {
    191    const unsigned flags = dav1d_get_cpu_flags();
    192 
    193    if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
    194 
    195    c->generate_grain_y = BF(dav1d_generate_grain_y, neon);
    196    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, neon);
    197    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, neon);
    198    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, neon);
    199 
    200    c->fgy_32x32xn = fgy_32x32xn_neon;
    201    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_neon;
    202    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_neon;
    203    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_neon;
    204 }