tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mc.h (10199B)


      1 /*
      2 * Copyright © 2018-2021, VideoLAN and dav1d authors
      3 * Copyright © 2018-2021, Two Orioles, LLC
      4 * All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions are met:
      8 *
      9 * 1. Redistributions of source code must retain the above copyright notice, this
     10 *    list of conditions and the following disclaimer.
     11 *
     12 * 2. Redistributions in binary form must reproduce the above copyright notice,
     13 *    this list of conditions and the following disclaimer in the documentation
     14 *    and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #include "src/cpu.h"
     29 #include "src/mc.h"
     30 
     31 #define decl_fn(type, name) \
     32    decl_##type##_fn(BF(name, ssse3)); \
     33    decl_##type##_fn(BF(name, avx2)); \
     34    decl_##type##_fn(BF(name, avx512icl));
     35 #define init_mc_fn(type, name, suffix) \
     36    c->mc[type] = BF(dav1d_put_##name, suffix)
     37 #define init_mct_fn(type, name, suffix) \
     38    c->mct[type] = BF(dav1d_prep_##name, suffix)
     39 #define init_mc_scaled_fn(type, name, suffix) \
     40    c->mc_scaled[type] = BF(dav1d_put_##name, suffix)
     41 #define init_mct_scaled_fn(type, name, suffix) \
     42    c->mct_scaled[type] = BF(dav1d_prep_##name, suffix)
     43 
     44 decl_8tap_fns(ssse3);
     45 decl_8tap_fns(avx2);
     46 decl_8tap_fns(avx512icl);
     47 
     48 decl_fn(mc, dav1d_put_bilin);
     49 decl_fn(mct, dav1d_prep_bilin);
     50 
     51 decl_fn(mc_scaled, dav1d_put_8tap_scaled_regular);
     52 decl_fn(mc_scaled, dav1d_put_8tap_scaled_regular_smooth);
     53 decl_fn(mc_scaled, dav1d_put_8tap_scaled_regular_sharp);
     54 decl_fn(mc_scaled, dav1d_put_8tap_scaled_smooth);
     55 decl_fn(mc_scaled, dav1d_put_8tap_scaled_smooth_regular);
     56 decl_fn(mc_scaled, dav1d_put_8tap_scaled_smooth_sharp);
     57 decl_fn(mc_scaled, dav1d_put_8tap_scaled_sharp);
     58 decl_fn(mc_scaled, dav1d_put_8tap_scaled_sharp_regular);
     59 decl_fn(mc_scaled, dav1d_put_8tap_scaled_sharp_smooth);
     60 decl_fn(mc_scaled, dav1d_put_bilin_scaled);
     61 
     62 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_regular);
     63 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_regular_smooth);
     64 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_regular_sharp);
     65 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_smooth);
     66 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_smooth_regular);
     67 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_smooth_sharp);
     68 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_sharp);
     69 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_sharp_regular);
     70 decl_fn(mct_scaled, dav1d_prep_8tap_scaled_sharp_smooth);
     71 decl_fn(mct_scaled, dav1d_prep_bilin_scaled);
     72 
     73 decl_fn(avg, dav1d_avg);
     74 decl_fn(w_avg, dav1d_w_avg);
     75 decl_fn(mask, dav1d_mask);
     76 decl_fn(w_mask, dav1d_w_mask_420);
     77 decl_fn(w_mask, dav1d_w_mask_422);
     78 decl_fn(w_mask, dav1d_w_mask_444);
     79 decl_fn(blend, dav1d_blend);
     80 decl_fn(blend_dir, dav1d_blend_v);
     81 decl_fn(blend_dir, dav1d_blend_h);
     82 
     83 decl_fn(warp8x8, dav1d_warp_affine_8x8);
     84 decl_warp8x8_fn(BF(dav1d_warp_affine_8x8, sse4));
     85 decl_fn(warp8x8t, dav1d_warp_affine_8x8t);
     86 decl_warp8x8t_fn(BF(dav1d_warp_affine_8x8t, sse4));
     87 
     88 decl_fn(emu_edge, dav1d_emu_edge);
     89 
     90 decl_fn(resize, dav1d_resize);
     91 
     92 static ALWAYS_INLINE void mc_dsp_init_x86(Dav1dMCDSPContext *const c) {
     93    const unsigned flags = dav1d_get_cpu_flags();
     94 
     95    if(!(flags & DAV1D_X86_CPU_FLAG_SSSE3))
     96        return;
     97 
     98    init_8tap_fns(ssse3);
     99 
    100    init_mc_fn(FILTER_2D_BILINEAR,             bilin,               ssse3);
    101    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               ssse3);
    102 
    103    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
    104    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
    105    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
    106    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
    107    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
    108    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
    109    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
    110    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
    111    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
    112    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
    113 
    114    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        ssse3);
    115    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, ssse3);
    116    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  ssse3);
    117    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, ssse3);
    118    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         ssse3);
    119    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   ssse3);
    120    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  ssse3);
    121    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   ssse3);
    122    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          ssse3);
    123    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               ssse3);
    124 
    125    c->avg = BF(dav1d_avg, ssse3);
    126    c->w_avg = BF(dav1d_w_avg, ssse3);
    127    c->mask = BF(dav1d_mask, ssse3);
    128    c->w_mask[0] = BF(dav1d_w_mask_444, ssse3);
    129    c->w_mask[1] = BF(dav1d_w_mask_422, ssse3);
    130    c->w_mask[2] = BF(dav1d_w_mask_420, ssse3);
    131    c->blend = BF(dav1d_blend, ssse3);
    132    c->blend_v = BF(dav1d_blend_v, ssse3);
    133    c->blend_h = BF(dav1d_blend_h, ssse3);
    134    c->warp8x8  = BF(dav1d_warp_affine_8x8, ssse3);
    135    c->warp8x8t = BF(dav1d_warp_affine_8x8t, ssse3);
    136    c->emu_edge = BF(dav1d_emu_edge, ssse3);
    137    c->resize = BF(dav1d_resize, ssse3);
    138 
    139    if(!(flags & DAV1D_X86_CPU_FLAG_SSE41))
    140        return;
    141 
    142 #if BITDEPTH == 8
    143    c->warp8x8  = BF(dav1d_warp_affine_8x8, sse4);
    144    c->warp8x8t = BF(dav1d_warp_affine_8x8t, sse4);
    145 #endif
    146 
    147 #if ARCH_X86_64
    148    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2))
    149        return;
    150 
    151    init_8tap_fns(avx2);
    152 
    153    init_mc_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
    154    init_mct_fn(FILTER_2D_BILINEAR,           bilin,               avx2);
    155 
    156    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
    157    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
    158    init_mc_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
    159    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
    160    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
    161    init_mc_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
    162    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
    163    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
    164    init_mc_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
    165    init_mc_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
    166 
    167    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR,        8tap_scaled_regular,        avx2);
    168    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_scaled_regular_smooth, avx2);
    169    init_mct_scaled_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_scaled_regular_sharp,  avx2);
    170    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_scaled_smooth_regular, avx2);
    171    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH,         8tap_scaled_smooth,         avx2);
    172    init_mct_scaled_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_scaled_smooth_sharp,   avx2);
    173    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_scaled_sharp_regular,  avx2);
    174    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_scaled_sharp_smooth,   avx2);
    175    init_mct_scaled_fn(FILTER_2D_8TAP_SHARP,          8tap_scaled_sharp,          avx2);
    176    init_mct_scaled_fn(FILTER_2D_BILINEAR,            bilin_scaled,               avx2);
    177 
    178    c->avg = BF(dav1d_avg, avx2);
    179    c->w_avg = BF(dav1d_w_avg, avx2);
    180    c->mask = BF(dav1d_mask, avx2);
    181    c->w_mask[0] = BF(dav1d_w_mask_444, avx2);
    182    c->w_mask[1] = BF(dav1d_w_mask_422, avx2);
    183    c->w_mask[2] = BF(dav1d_w_mask_420, avx2);
    184    c->blend = BF(dav1d_blend, avx2);
    185    c->blend_v = BF(dav1d_blend_v, avx2);
    186    c->blend_h = BF(dav1d_blend_h, avx2);
    187    c->warp8x8  = BF(dav1d_warp_affine_8x8, avx2);
    188    c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx2);
    189    c->emu_edge = BF(dav1d_emu_edge, avx2);
    190    c->resize = BF(dav1d_resize, avx2);
    191 
    192    if (!(flags & DAV1D_X86_CPU_FLAG_AVX512ICL))
    193        return;
    194 
    195    init_8tap_fns(avx512icl);
    196 
    197    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               avx512icl);
    198    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx512icl);
    199 
    200    c->avg = BF(dav1d_avg, avx512icl);
    201    c->w_avg = BF(dav1d_w_avg, avx512icl);
    202    c->mask = BF(dav1d_mask, avx512icl);
    203    c->w_mask[0] = BF(dav1d_w_mask_444, avx512icl);
    204    c->w_mask[1] = BF(dav1d_w_mask_422, avx512icl);
    205    c->w_mask[2] = BF(dav1d_w_mask_420, avx512icl);
    206    c->blend = BF(dav1d_blend, avx512icl);
    207    c->blend_v = BF(dav1d_blend_v, avx512icl);
    208    c->blend_h = BF(dav1d_blend_h, avx512icl);
    209 
    210    if (!(flags & DAV1D_X86_CPU_FLAG_SLOW_GATHER)) {
    211        c->resize = BF(dav1d_resize, avx512icl);
    212        c->warp8x8  = BF(dav1d_warp_affine_8x8, avx512icl);
    213        c->warp8x8t = BF(dav1d_warp_affine_8x8t, avx512icl);
    214    }
    215 #endif
    216 }