tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

filmgrain_tmpl.c (18884B)


      1 /*
      2 * Copyright © 2018, Niklas Haas
      3 * Copyright © 2018, VideoLAN and dav1d authors
      4 * Copyright © 2018, Two Orioles, LLC
      5 * All rights reserved.
      6 *
      7 * Redistribution and use in source and binary forms, with or without
      8 * modification, are permitted provided that the following conditions are met:
      9 *
     10 * 1. Redistributions of source code must retain the above copyright notice, this
     11 *    list of conditions and the following disclaimer.
     12 *
     13 * 2. Redistributions in binary form must reproduce the above copyright notice,
     14 *    this list of conditions and the following disclaimer in the documentation
     15 *    and/or other materials provided with the distribution.
     16 *
     17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 */
     28 
     29 #include "common/attributes.h"
     30 #include "common/intops.h"
     31 
     32 #include "src/filmgrain.h"
     33 #include "src/tables.h"
     34 
     35 #define SUB_GRAIN_WIDTH 44
     36 #define SUB_GRAIN_HEIGHT 38
     37 
     38 static inline int get_random_number(const int bits, unsigned *const state) {
     39    const int r = *state;
     40    unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;
     41    *state = (r >> 1) | (bit << 15);
     42 
     43    return (*state >> (16 - bits)) & ((1 << bits) - 1);
     44 }
     45 
     46 static inline int round2(const int x, const uint64_t shift) {
     47    return (x + ((1 << shift) >> 1)) >> shift;
     48 }
     49 
     50 static void generate_grain_y_c(entry buf[][GRAIN_WIDTH],
     51                               const Dav1dFilmGrainData *const data
     52                               HIGHBD_DECL_SUFFIX)
     53 {
     54    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
     55    unsigned seed = data->seed;
     56    const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
     57    const int grain_ctr = 128 << bitdepth_min_8;
     58    const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
     59 
     60    for (int y = 0; y < GRAIN_HEIGHT; y++) {
     61        for (int x = 0; x < GRAIN_WIDTH; x++) {
     62            const int value = get_random_number(11, &seed);
     63            buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
     64        }
     65    }
     66 
     67    const int ar_pad = 3;
     68    const int ar_lag = data->ar_coeff_lag;
     69 
     70    for (int y = ar_pad; y < GRAIN_HEIGHT; y++) {
     71        for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) {
     72            const int8_t *coeff = data->ar_coeffs_y;
     73            int sum = 0;
     74            for (int dy = -ar_lag; dy <= 0; dy++) {
     75                for (int dx = -ar_lag; dx <= ar_lag; dx++) {
     76                    if (!dx && !dy)
     77                        break;
     78                    sum += *(coeff++) * buf[y + dy][x + dx];
     79                }
     80            }
     81 
     82            const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
     83            buf[y][x] = iclip(grain, grain_min, grain_max);
     84        }
     85    }
     86 }
     87 
     88 static NOINLINE void
     89 generate_grain_uv_c(entry buf[][GRAIN_WIDTH],
     90                    const entry buf_y[][GRAIN_WIDTH],
     91                    const Dav1dFilmGrainData *const data, const intptr_t uv,
     92                    const int subx, const int suby HIGHBD_DECL_SUFFIX)
     93 {
     94    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
     95    unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
     96    const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
     97    const int grain_ctr = 128 << bitdepth_min_8;
     98    const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
     99 
    100    const int chromaW = subx ? SUB_GRAIN_WIDTH  : GRAIN_WIDTH;
    101    const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;
    102 
    103    for (int y = 0; y < chromaH; y++) {
    104        for (int x = 0; x < chromaW; x++) {
    105            const int value = get_random_number(11, &seed);
    106            buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
    107        }
    108    }
    109 
    110    const int ar_pad = 3;
    111    const int ar_lag = data->ar_coeff_lag;
    112 
    113    for (int y = ar_pad; y < chromaH; y++) {
    114        for (int x = ar_pad; x < chromaW - ar_pad; x++) {
    115            const int8_t *coeff = data->ar_coeffs_uv[uv];
    116            int sum = 0;
    117            for (int dy = -ar_lag; dy <= 0; dy++) {
    118                for (int dx = -ar_lag; dx <= ar_lag; dx++) {
    119                    // For the final (current) pixel, we need to add in the
    120                    // contribution from the luma grain texture
    121                    if (!dx && !dy) {
    122                        if (!data->num_y_points)
    123                            break;
    124                        int luma = 0;
    125                        const int lumaX = ((x - ar_pad) << subx) + ar_pad;
    126                        const int lumaY = ((y - ar_pad) << suby) + ar_pad;
    127                        for (int i = 0; i <= suby; i++) {
    128                            for (int j = 0; j <= subx; j++) {
    129                                luma += buf_y[lumaY + i][lumaX + j];
    130                            }
    131                        }
    132                        luma = round2(luma, subx + suby);
    133                        sum += luma * (*coeff);
    134                        break;
    135                    }
    136 
    137                    sum += *(coeff++) * buf[y + dy][x + dx];
    138                }
    139            }
    140 
    141            const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
    142            buf[y][x] = iclip(grain, grain_min, grain_max);
    143        }
    144    }
    145 }
    146 
    147 #define gnuv_ss_fn(nm, ss_x, ss_y) \
    148 static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \
    149    generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \
    150 }
    151 
    152 gnuv_ss_fn(420, 1, 1);
    153 gnuv_ss_fn(422, 1, 0);
    154 gnuv_ss_fn(444, 0, 0);
    155 
    156 // samples from the correct block of a grain LUT, while taking into account the
    157 // offsets provided by the offsets cache
    158 static inline entry sample_lut(const entry grain_lut[][GRAIN_WIDTH],
    159                               const int offsets[2][2], const int subx, const int suby,
    160                               const int bx, const int by, const int x, const int y)
    161 {
    162    const int randval = offsets[bx][by];
    163    const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));
    164    const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));
    165    return grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by]
    166                    [offx + x + (FG_BLOCK_SIZE >> subx) * bx];
    167 }
    168 
    169 static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
    170                          const ptrdiff_t stride,
    171                          const Dav1dFilmGrainData *const data, const size_t pw,
    172                          const uint8_t scaling[SCALING_SIZE],
    173                          const entry grain_lut[][GRAIN_WIDTH],
    174                          const int bh, const int row_num HIGHBD_DECL_SUFFIX)
    175 {
    176    const int rows = 1 + (data->overlap_flag && row_num > 0);
    177    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
    178    const int grain_ctr = 128 << bitdepth_min_8;
    179    const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
    180 
    181    int min_value, max_value;
    182    if (data->clip_to_restricted_range) {
    183        min_value = 16 << bitdepth_min_8;
    184        max_value = 235 << bitdepth_min_8;
    185    } else {
    186        min_value = 0;
    187        max_value = BITDEPTH_MAX;
    188    }
    189 
    190    // seed[0] contains the current row, seed[1] contains the previous
    191    unsigned seed[2];
    192    for (int i = 0; i < rows; i++) {
    193        seed[i] = data->seed;
    194        seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;
    195        seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
    196    }
    197 
    198    assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0);
    199 
    200    int offsets[2 /* col offset */][2 /* row offset */];
    201 
    202    // process this row in FG_BLOCK_SIZE^2 blocks
    203    for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
    204        const int bw = imin(FG_BLOCK_SIZE, (int) pw - bx);
    205 
    206        if (data->overlap_flag && bx) {
    207            // shift previous offsets left
    208            for (int i = 0; i < rows; i++)
    209                offsets[1][i] = offsets[0][i];
    210        }
    211 
    212        // update current offsets
    213        for (int i = 0; i < rows; i++)
    214            offsets[0][i] = get_random_number(8, &seed[i]);
    215 
    216        // x/y block offsets to compensate for overlapped regions
    217        const int ystart = data->overlap_flag && row_num ? imin(2, bh) : 0;
    218        const int xstart = data->overlap_flag && bx      ? imin(2, bw) : 0;
    219 
    220        static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
    221 
    222 #define add_noise_y(x, y, grain)                                                  \
    223        const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (x) + bx;     \
    224        pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (x) + bx;           \
    225        const int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
    226        *dst = iclip(*src + noise, min_value, max_value);
    227 
    228        for (int y = ystart; y < bh; y++) {
    229            // Non-overlapped image region (straightforward)
    230            for (int x = xstart; x < bw; x++) {
    231                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
    232                add_noise_y(x, y, grain);
    233            }
    234 
    235            // Special case for overlapped column
    236            for (int x = 0; x < xstart; x++) {
    237                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
    238                int old   = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
    239                grain = round2(old * w[x][0] + grain * w[x][1], 5);
    240                grain = iclip(grain, grain_min, grain_max);
    241                add_noise_y(x, y, grain);
    242            }
    243        }
    244 
    245        for (int y = 0; y < ystart; y++) {
    246            // Special case for overlapped row (sans corner)
    247            for (int x = xstart; x < bw; x++) {
    248                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
    249                int old   = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
    250                grain = round2(old * w[y][0] + grain * w[y][1], 5);
    251                grain = iclip(grain, grain_min, grain_max);
    252                add_noise_y(x, y, grain);
    253            }
    254 
    255            // Special case for doubly-overlapped corner
    256            for (int x = 0; x < xstart; x++) {
    257                // Blend the top pixel with the top left block
    258                int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
    259                int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
    260                top = round2(old * w[x][0] + top * w[x][1], 5);
    261                top = iclip(top, grain_min, grain_max);
    262 
    263                // Blend the current pixel with the left block
    264                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
    265                old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
    266                grain = round2(old * w[x][0] + grain * w[x][1], 5);
    267                grain = iclip(grain, grain_min, grain_max);
    268 
    269                // Mix the row rows together and apply grain
    270                grain = round2(top * w[y][0] + grain * w[y][1], 5);
    271                grain = iclip(grain, grain_min, grain_max);
    272                add_noise_y(x, y, grain);
    273            }
    274        }
    275    }
    276 }
    277 
    278 static NOINLINE void
    279 fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
    280               const ptrdiff_t stride, const Dav1dFilmGrainData *const data,
    281               const size_t pw, const uint8_t scaling[SCALING_SIZE],
    282               const entry grain_lut[][GRAIN_WIDTH], const int bh,
    283               const int row_num, const pixel *const luma_row,
    284               const ptrdiff_t luma_stride, const int uv, const int is_id,
    285               const int sx, const int sy HIGHBD_DECL_SUFFIX)
    286 {
    287    const int rows = 1 + (data->overlap_flag && row_num > 0);
    288    const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
    289    const int grain_ctr = 128 << bitdepth_min_8;
    290    const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
    291 
    292    int min_value, max_value;
    293    if (data->clip_to_restricted_range) {
    294        min_value = 16 << bitdepth_min_8;
    295        max_value = (is_id ? 235 : 240) << bitdepth_min_8;
    296    } else {
    297        min_value = 0;
    298        max_value = BITDEPTH_MAX;
    299    }
    300 
    301    // seed[0] contains the current row, seed[1] contains the previous
    302    unsigned seed[2];
    303    for (int i = 0; i < rows; i++) {
    304        seed[i] = data->seed;
    305        seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;
    306        seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
    307    }
    308 
    309    assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0);
    310 
    311    int offsets[2 /* col offset */][2 /* row offset */];
    312 
    313    // process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
    314    for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) {
    315        const int bw = imin(FG_BLOCK_SIZE >> sx, (int)(pw - bx));
    316        if (data->overlap_flag && bx) {
    317            // shift previous offsets left
    318            for (int i = 0; i < rows; i++)
    319                offsets[1][i] = offsets[0][i];
    320        }
    321 
    322        // update current offsets
    323        for (int i = 0; i < rows; i++)
    324            offsets[0][i] = get_random_number(8, &seed[i]);
    325 
    326        // x/y block offsets to compensate for overlapped regions
    327        const int ystart = data->overlap_flag && row_num ? imin(2 >> sy, bh) : 0;
    328        const int xstart = data->overlap_flag && bx      ? imin(2 >> sx, bw) : 0;
    329 
    330        static const int w[2 /* sub */][2 /* off */][2] = {
    331            { { 27, 17 }, { 17, 27 } },
    332            { { 23, 22 } },
    333        };
    334 
    335 #define add_noise_uv(x, y, grain)                                                    \
    336            const int lx = (bx + x) << sx;                                           \
    337            const int ly = y << sy;                                                  \
    338            const pixel *const luma = luma_row + ly * PXSTRIDE(luma_stride) + lx;    \
    339            pixel avg = luma[0];                                                     \
    340            if (sx)                                                                  \
    341                avg = (avg + luma[1] + 1) >> 1;                                      \
    342            const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (bx + (x));  \
    343            pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x));        \
    344            int val = avg;                                                           \
    345            if (!data->chroma_scaling_from_luma) {                                   \
    346                const int combined = avg * data->uv_luma_mult[uv] +                  \
    347                               *src * data->uv_mult[uv];                             \
    348                val = iclip_pixel( (combined >> 6) +                                 \
    349                                   (data->uv_offset[uv] * (1 << bitdepth_min_8)) );  \
    350            }                                                                        \
    351            const int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
    352            *dst = iclip(*src + noise, min_value, max_value);
    353 
    354        for (int y = ystart; y < bh; y++) {
    355            // Non-overlapped image region (straightforward)
    356            for (int x = xstart; x < bw; x++) {
    357                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
    358                add_noise_uv(x, y, grain);
    359            }
    360 
    361            // Special case for overlapped column
    362            for (int x = 0; x < xstart; x++) {
    363                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
    364                int old   = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
    365                grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5);
    366                grain = iclip(grain, grain_min, grain_max);
    367                add_noise_uv(x, y, grain);
    368            }
    369        }
    370 
    371        for (int y = 0; y < ystart; y++) {
    372            // Special case for overlapped row (sans corner)
    373            for (int x = xstart; x < bw; x++) {
    374                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
    375                int old   = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
    376                grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5);
    377                grain = iclip(grain, grain_min, grain_max);
    378                add_noise_uv(x, y, grain);
    379            }
    380 
    381            // Special case for doubly-overlapped corner
    382            for (int x = 0; x < xstart; x++) {
    383                // Blend the top pixel with the top left block
    384                int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
    385                int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);
    386                top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5);
    387                top = iclip(top, grain_min, grain_max);
    388 
    389                // Blend the current pixel with the left block
    390                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
    391                old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
    392                grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5);
    393                grain = iclip(grain, grain_min, grain_max);
    394 
    395                // Mix the row rows together and apply to image
    396                grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5);
    397                grain = iclip(grain, grain_min, grain_max);
    398                add_noise_uv(x, y, grain);
    399            }
    400        }
    401    }
    402 }
    403 
    404 #define fguv_ss_fn(nm, ss_x, ss_y) \
    405 static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \
    406    fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \
    407                   row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \
    408                   HIGHBD_TAIL_SUFFIX); \
    409 }
    410 
    411 fguv_ss_fn(420, 1, 1);
    412 fguv_ss_fn(422, 1, 0);
    413 fguv_ss_fn(444, 0, 0);
    414 
    415 #if HAVE_ASM
    416 #if ARCH_AARCH64 || ARCH_ARM
    417 #include "src/arm/filmgrain.h"
    418 #elif ARCH_X86
    419 #include "src/x86/filmgrain.h"
    420 #endif
    421 #endif
    422 
    423 COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
    424    c->generate_grain_y = generate_grain_y_c;
    425    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
    426    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
    427    c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
    428 
    429    c->fgy_32x32xn = fgy_32x32xn_c;
    430    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
    431    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
    432    c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
    433 
    434 #if HAVE_ASM
    435 #if ARCH_AARCH64 || ARCH_ARM
    436    film_grain_dsp_init_arm(c);
    437 #elif ARCH_X86
    438    film_grain_dsp_init_x86(c);
    439 #endif
    440 #endif
    441 }