tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

itx_tmpl.c (11401B)


      1 /*
      2 * Copyright © 2018-2019, VideoLAN and dav1d authors
      3 * Copyright © 2018-2019, Two Orioles, LLC
      4 * All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions are met:
      8 *
      9 * 1. Redistributions of source code must retain the above copyright notice, this
     10 *    list of conditions and the following disclaimer.
     11 *
     12 * 2. Redistributions in binary form must reproduce the above copyright notice,
     13 *    this list of conditions and the following disclaimer in the documentation
     14 *    and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #include "config.h"
     29 
     30 #include <stddef.h>
     31 #include <stdint.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 
     35 #include "common/attributes.h"
     36 #include "common/intops.h"
     37 
     38 #include "src/itx.h"
     39 #include "src/itx_1d.h"
     40 #include "src/scan.h"
     41 #include "src/tables.h"
     42 
     43 static NOINLINE void
     44 inv_txfm_add_c(pixel *dst, const ptrdiff_t stride, coef *const coeff,
     45               const int eob, const /*enum RectTxfmSize*/ int tx, const int shift,
     46               const enum TxfmType txtp HIGHBD_DECL_SUFFIX)
     47 {
     48    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
     49    const int w = 4 * t_dim->w, h = 4 * t_dim->h;
     50    const int has_dconly = txtp == DCT_DCT;
     51    assert(w >= 4 && w <= 64);
     52    assert(h >= 4 && h <= 64);
     53    assert(eob >= 0);
     54 
     55    const int is_rect2 = w * 2 == h || h * 2 == w;
     56    const int rnd = (1 << shift) >> 1;
     57 
     58    if (eob < has_dconly) {
     59        int dc = coeff[0];
     60        coeff[0] = 0;
     61        if (is_rect2)
     62            dc = (dc * 181 + 128) >> 8;
     63        dc = (dc * 181 + 128) >> 8;
     64        dc = (dc + rnd) >> shift;
     65        dc = (dc * 181 + 128 + 2048) >> 12;
     66        for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
     67            for (int x = 0; x < w; x++)
     68                dst[x] = iclip_pixel(dst[x] + dc);
     69        return;
     70    }
     71 
     72    const uint8_t *const txtps = dav1d_tx1d_types[txtp];
     73    const itx_1d_fn first_1d_fn = dav1d_tx1d_fns[t_dim->lw][txtps[0]];
     74    const itx_1d_fn second_1d_fn = dav1d_tx1d_fns[t_dim->lh][txtps[1]];
     75    const int sh = imin(h, 32), sw = imin(w, 32);
     76 #if BITDEPTH == 8
     77    const int row_clip_min = INT16_MIN;
     78    const int col_clip_min = INT16_MIN;
     79 #else
     80    const int row_clip_min = (int) ((unsigned) ~bitdepth_max << 7);
     81    const int col_clip_min = (int) ((unsigned) ~bitdepth_max << 5);
     82 #endif
     83    const int row_clip_max = ~row_clip_min;
     84    const int col_clip_max = ~col_clip_min;
     85 
     86    int32_t tmp[64 * 64], *c = tmp;
     87    int last_nonzero_col; // in first 1d itx
     88    if (txtps[1] == IDENTITY && txtps[0] != IDENTITY) {
     89        last_nonzero_col = imin(sh - 1, eob);
     90    } else if (txtps[0] == IDENTITY && txtps[1] != IDENTITY) {
     91        last_nonzero_col = eob >> (t_dim->lw + 2);
     92    } else {
     93        last_nonzero_col = dav1d_last_nonzero_col_from_eob[tx][eob];
     94    }
     95    assert(last_nonzero_col < sh);
     96    for (int y = 0; y <= last_nonzero_col; y++, c += w) {
     97        if (is_rect2)
     98            for (int x = 0; x < sw; x++)
     99                c[x] = (coeff[y + x * sh] * 181 + 128) >> 8;
    100        else
    101            for (int x = 0; x < sw; x++)
    102                c[x] = coeff[y + x * sh];
    103        first_1d_fn(c, 1, row_clip_min, row_clip_max);
    104    }
    105    if (last_nonzero_col + 1 < sh)
    106        memset(c, 0, sizeof(*c) * (sh - last_nonzero_col - 1) * w);
    107 
    108    memset(coeff, 0, sizeof(*coeff) * sw * sh);
    109    for (int i = 0; i < w * sh; i++)
    110        tmp[i] = iclip((tmp[i] + rnd) >> shift, col_clip_min, col_clip_max);
    111 
    112    for (int x = 0; x < w; x++)
    113        second_1d_fn(&tmp[x], w, col_clip_min, col_clip_max);
    114 
    115    c = tmp;
    116    for (int y = 0; y < h; y++, dst += PXSTRIDE(stride))
    117        for (int x = 0; x < w; x++)
    118            dst[x] = iclip_pixel(dst[x] + ((*c++ + 8) >> 4));
    119 }
    120 
    121 #define inv_txfm_fn(type1, type2, type, pfx, w, h, shift) \
    122 static void \
    123 inv_txfm_add_##type1##_##type2##_##w##x##h##_c(pixel *dst, \
    124                                               const ptrdiff_t stride, \
    125                                               coef *const coeff, \
    126                                               const int eob \
    127                                               HIGHBD_DECL_SUFFIX) \
    128 { \
    129    inv_txfm_add_c(dst, stride, coeff, eob, pfx##TX_##w##X##h, shift, type \
    130                   HIGHBD_TAIL_SUFFIX); \
    131 }
    132 
    133 #define inv_txfm_fn64(pfx, w, h, shift) \
    134 inv_txfm_fn(dct, dct, DCT_DCT, pfx, w, h, shift)
    135 
    136 #define inv_txfm_fn32(pfx, w, h, shift) \
    137 inv_txfm_fn64(pfx, w, h, shift) \
    138 inv_txfm_fn(identity, identity, IDTX, pfx, w, h, shift)
    139 
    140 #define inv_txfm_fn16(pfx, w, h, shift) \
    141 inv_txfm_fn32(pfx, w, h, shift) \
    142 inv_txfm_fn(adst,     dct,      ADST_DCT,          pfx,  w, h, shift) \
    143 inv_txfm_fn(dct,      adst,     DCT_ADST,          pfx, w, h, shift) \
    144 inv_txfm_fn(adst,     adst,     ADST_ADST,         pfx, w, h, shift) \
    145 inv_txfm_fn(dct,      flipadst, DCT_FLIPADST,      pfx, w, h, shift) \
    146 inv_txfm_fn(flipadst, dct,      FLIPADST_DCT,      pfx, w, h, shift) \
    147 inv_txfm_fn(adst,     flipadst, ADST_FLIPADST,     pfx, w, h, shift) \
    148 inv_txfm_fn(flipadst, adst,     FLIPADST_ADST,     pfx, w, h, shift) \
    149 inv_txfm_fn(flipadst, flipadst, FLIPADST_FLIPADST, pfx, w, h, shift) \
    150 inv_txfm_fn(identity, dct,      H_DCT,             pfx, w, h, shift) \
    151 inv_txfm_fn(dct,      identity, V_DCT,             pfx, w, h, shift) \
    152 
    153 #define inv_txfm_fn84(pfx, w, h, shift) \
    154 inv_txfm_fn16(pfx, w, h, shift) \
    155 inv_txfm_fn(identity, flipadst, H_FLIPADST, pfx, w, h, shift) \
    156 inv_txfm_fn(flipadst, identity, V_FLIPADST, pfx, w, h, shift) \
    157 inv_txfm_fn(identity, adst,     H_ADST,     pfx, w, h, shift) \
    158 inv_txfm_fn(adst,     identity, V_ADST,     pfx, w, h, shift) \
    159 
    160 inv_txfm_fn84( ,  4,  4, 0)
    161 inv_txfm_fn84(R,  4,  8, 0)
    162 inv_txfm_fn84(R,  4, 16, 1)
    163 inv_txfm_fn84(R,  8,  4, 0)
    164 inv_txfm_fn84( ,  8,  8, 1)
    165 inv_txfm_fn84(R,  8, 16, 1)
    166 inv_txfm_fn32(R,  8, 32, 2)
    167 inv_txfm_fn84(R, 16,  4, 1)
    168 inv_txfm_fn84(R, 16,  8, 1)
    169 inv_txfm_fn16( , 16, 16, 2)
    170 inv_txfm_fn32(R, 16, 32, 1)
    171 inv_txfm_fn64(R, 16, 64, 2)
    172 inv_txfm_fn32(R, 32,  8, 2)
    173 inv_txfm_fn32(R, 32, 16, 1)
    174 inv_txfm_fn32( , 32, 32, 2)
    175 inv_txfm_fn64(R, 32, 64, 1)
    176 inv_txfm_fn64(R, 64, 16, 2)
    177 inv_txfm_fn64(R, 64, 32, 1)
    178 inv_txfm_fn64( , 64, 64, 2)
    179 
    180 #if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
    181  ARCH_AARCH64 || \
    182  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
    183 ))
    184 static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
    185                                       coef *const coeff, const int eob
    186                                       HIGHBD_DECL_SUFFIX)
    187 {
    188    int32_t tmp[4 * 4], *c = tmp;
    189    for (int y = 0; y < 4; y++, c += 4) {
    190        for (int x = 0; x < 4; x++)
    191            c[x] = coeff[y + x * 4] >> 2;
    192        dav1d_inv_wht4_1d_c(c, 1);
    193    }
    194    memset(coeff, 0, sizeof(*coeff) * 4 * 4);
    195 
    196    for (int x = 0; x < 4; x++)
    197        dav1d_inv_wht4_1d_c(&tmp[x], 4);
    198 
    199    c = tmp;
    200    for (int y = 0; y < 4; y++, dst += PXSTRIDE(stride))
    201        for (int x = 0; x < 4; x++)
    202            dst[x] = iclip_pixel(dst[x] + *c++);
    203 }
    204 #endif
    205 
    206 #if HAVE_ASM
    207 #if ARCH_AARCH64 || ARCH_ARM
    208 #include "src/arm/itx.h"
    209 #elif ARCH_LOONGARCH64
    210 #include "src/loongarch/itx.h"
    211 #elif ARCH_PPC64LE
    212 #include "src/ppc/itx.h"
    213 #elif ARCH_RISCV
    214 #include "src/riscv/itx.h"
    215 #elif ARCH_X86
    216 #include "src/x86/itx.h"
    217 #endif
    218 #endif
    219 
    220 COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
    221 #define assign_itx_all_fn64(w, h, pfx) \
    222    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
    223        inv_txfm_add_dct_dct_##w##x##h##_c
    224 
    225 #define assign_itx_all_fn32(w, h, pfx) \
    226    assign_itx_all_fn64(w, h, pfx); \
    227    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
    228        inv_txfm_add_identity_identity_##w##x##h##_c
    229 
    230 #define assign_itx_all_fn16(w, h, pfx) \
    231    assign_itx_all_fn32(w, h, pfx); \
    232    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
    233        inv_txfm_add_adst_dct_##w##x##h##_c; \
    234    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
    235        inv_txfm_add_dct_adst_##w##x##h##_c; \
    236    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
    237        inv_txfm_add_adst_adst_##w##x##h##_c; \
    238    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
    239        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
    240    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
    241        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
    242    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
    243        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
    244    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
    245        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
    246    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
    247        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
    248    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
    249        inv_txfm_add_dct_identity_##w##x##h##_c; \
    250    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
    251        inv_txfm_add_identity_dct_##w##x##h##_c
    252 
    253 #define assign_itx_all_fn84(w, h, pfx) \
    254    assign_itx_all_fn16(w, h, pfx); \
    255    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
    256        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
    257    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
    258        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
    259    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
    260        inv_txfm_add_adst_identity_##w##x##h##_c; \
    261    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
    262        inv_txfm_add_identity_adst_##w##x##h##_c; \
    263 
    264 #if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
    265  ARCH_AARCH64 || \
    266  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
    267 ))
    268    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
    269 #endif
    270    assign_itx_all_fn84( 4,  4, );
    271    assign_itx_all_fn84( 4,  8, R);
    272    assign_itx_all_fn84( 4, 16, R);
    273    assign_itx_all_fn84( 8,  4, R);
    274    assign_itx_all_fn84( 8,  8, );
    275    assign_itx_all_fn84( 8, 16, R);
    276    assign_itx_all_fn32( 8, 32, R);
    277    assign_itx_all_fn84(16,  4, R);
    278    assign_itx_all_fn84(16,  8, R);
    279    assign_itx_all_fn16(16, 16, );
    280    assign_itx_all_fn32(16, 32, R);
    281    assign_itx_all_fn64(16, 64, R);
    282    assign_itx_all_fn32(32,  8, R);
    283    assign_itx_all_fn32(32, 16, R);
    284    assign_itx_all_fn32(32, 32, );
    285    assign_itx_all_fn64(32, 64, R);
    286    assign_itx_all_fn64(64, 16, R);
    287    assign_itx_all_fn64(64, 32, R);
    288    assign_itx_all_fn64(64, 64, );
    289 
    290    int all_simd = 0;
    291 #if HAVE_ASM
    292 #if ARCH_AARCH64 || ARCH_ARM
    293    itx_dsp_init_arm(c, bpc, &all_simd);
    294 #endif
    295 #if ARCH_LOONGARCH64
    296    itx_dsp_init_loongarch(c, bpc);
    297 #endif
    298 #if ARCH_PPC64LE
    299    itx_dsp_init_ppc(c, bpc);
    300 #endif
    301 #if ARCH_RISCV
    302    itx_dsp_init_riscv(c, bpc);
    303 #endif
    304 #if ARCH_X86
    305    itx_dsp_init_x86(c, bpc, &all_simd);
    306 #endif
    307 #endif
    308 
    309    if (!all_simd)
    310        dav1d_init_last_nonzero_col_from_eob_tables();
    311 }