tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

videodsp_init.c (9991B)


      1 /*
      2 * Copyright (C) 2002-2012 Michael Niedermayer
      3 * Copyright (C) 2012 Ronald S. Bultje
      4 *
      5 * This file is part of FFmpeg.
      6 *
      7 * FFmpeg is free software; you can redistribute it and/or
      8 * modify it under the terms of the GNU Lesser General Public
      9 * License as published by the Free Software Foundation; either
     10 * version 2.1 of the License, or (at your option) any later version.
     11 *
     12 * FFmpeg is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 * Lesser General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU Lesser General Public
     18 * License along with FFmpeg; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     20 */
     21 
     22 #include "config.h"
     23 #include "libavutil/attributes.h"
     24 #include "libavutil/avassert.h"
     25 #include "libavutil/common.h"
     26 #include "libavutil/cpu.h"
     27 #include "libavutil/x86/asm.h"
     28 #include "libavutil/x86/cpu.h"
     29 #include "libavcodec/videodsp.h"
     30 
     31 #if HAVE_X86ASM
     32 typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride,
     33                                const uint8_t *src, x86_reg src_stride,
     34                                x86_reg start_y, x86_reg end_y, x86_reg bh);
     35 typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride,
     36                                const uint8_t *src, x86_reg src_stride,
     37                                x86_reg start_y, x86_reg end_y, x86_reg bh,
     38                                x86_reg w);
     39 
     40 extern emu_edge_vfix_func ff_emu_edge_vfix1_sse2;
     41 extern emu_edge_vfix_func ff_emu_edge_vfix2_sse2;
     42 extern emu_edge_vfix_func ff_emu_edge_vfix3_sse2;
     43 extern emu_edge_vfix_func ff_emu_edge_vfix4_sse2;
     44 extern emu_edge_vfix_func ff_emu_edge_vfix5_sse2;
     45 extern emu_edge_vfix_func ff_emu_edge_vfix6_sse2;
     46 extern emu_edge_vfix_func ff_emu_edge_vfix7_sse2;
     47 extern emu_edge_vfix_func ff_emu_edge_vfix8_sse2;
     48 extern emu_edge_vfix_func ff_emu_edge_vfix9_sse2;
     49 extern emu_edge_vfix_func ff_emu_edge_vfix10_sse2;
     50 extern emu_edge_vfix_func ff_emu_edge_vfix11_sse2;
     51 extern emu_edge_vfix_func ff_emu_edge_vfix12_sse2;
     52 extern emu_edge_vfix_func ff_emu_edge_vfix13_sse2;
     53 extern emu_edge_vfix_func ff_emu_edge_vfix14_sse2;
     54 extern emu_edge_vfix_func ff_emu_edge_vfix15_sse2;
     55 extern emu_edge_vfix_func ff_emu_edge_vfix16_sse2;
     56 extern emu_edge_vfix_func ff_emu_edge_vfix17_sse2;
     57 extern emu_edge_vfix_func ff_emu_edge_vfix18_sse2;
     58 extern emu_edge_vfix_func ff_emu_edge_vfix19_sse2;
     59 extern emu_edge_vfix_func ff_emu_edge_vfix20_sse2;
     60 extern emu_edge_vfix_func ff_emu_edge_vfix21_sse2;
     61 extern emu_edge_vfix_func ff_emu_edge_vfix22_sse2;
     62 static emu_edge_vfix_func * const vfixtbl_sse2[22] = {
     63    ff_emu_edge_vfix1_sse2,  ff_emu_edge_vfix2_sse2,  ff_emu_edge_vfix3_sse2,
     64    ff_emu_edge_vfix4_sse2,  ff_emu_edge_vfix5_sse2,  ff_emu_edge_vfix6_sse2,
     65    ff_emu_edge_vfix7_sse2,  ff_emu_edge_vfix8_sse2,  ff_emu_edge_vfix9_sse2,
     66    ff_emu_edge_vfix10_sse2, ff_emu_edge_vfix11_sse2, ff_emu_edge_vfix12_sse2,
     67    ff_emu_edge_vfix13_sse2, ff_emu_edge_vfix14_sse2, ff_emu_edge_vfix15_sse2,
     68    ff_emu_edge_vfix16_sse2, ff_emu_edge_vfix17_sse2, ff_emu_edge_vfix18_sse2,
     69    ff_emu_edge_vfix19_sse2, ff_emu_edge_vfix20_sse2, ff_emu_edge_vfix21_sse2,
     70    ff_emu_edge_vfix22_sse2
     71 };
     72 extern emu_edge_vvar_func ff_emu_edge_vvar_sse;
     73 
     74 typedef void emu_edge_hfix_func(uint8_t *dst, x86_reg dst_stride,
     75                                x86_reg start_x, x86_reg bh);
     76 typedef void emu_edge_hvar_func(uint8_t *dst, x86_reg dst_stride,
     77                                x86_reg start_x, x86_reg n_words, x86_reg bh);
     78 
     79 extern emu_edge_hfix_func ff_emu_edge_hfix2_sse2;
     80 extern emu_edge_hfix_func ff_emu_edge_hfix4_sse2;
     81 extern emu_edge_hfix_func ff_emu_edge_hfix6_sse2;
     82 extern emu_edge_hfix_func ff_emu_edge_hfix8_sse2;
     83 extern emu_edge_hfix_func ff_emu_edge_hfix10_sse2;
     84 extern emu_edge_hfix_func ff_emu_edge_hfix12_sse2;
     85 extern emu_edge_hfix_func ff_emu_edge_hfix14_sse2;
     86 extern emu_edge_hfix_func ff_emu_edge_hfix16_sse2;
     87 extern emu_edge_hfix_func ff_emu_edge_hfix18_sse2;
     88 extern emu_edge_hfix_func ff_emu_edge_hfix20_sse2;
     89 extern emu_edge_hfix_func ff_emu_edge_hfix22_sse2;
     90 static emu_edge_hfix_func * const hfixtbl_sse2[11] = {
     91    ff_emu_edge_hfix2_sse2,  ff_emu_edge_hfix4_sse2,  ff_emu_edge_hfix6_sse2,
     92    ff_emu_edge_hfix8_sse2,  ff_emu_edge_hfix10_sse2, ff_emu_edge_hfix12_sse2,
     93    ff_emu_edge_hfix14_sse2, ff_emu_edge_hfix16_sse2, ff_emu_edge_hfix18_sse2,
     94    ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2
     95 };
     96 extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
     97 #if HAVE_AVX2_EXTERNAL
     98 extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2;
     99 extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2;
    100 extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2;
    101 extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2;
    102 extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2;
    103 extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2;
    104 extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2;
    105 extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2;
    106 static emu_edge_hfix_func * const hfixtbl_avx2[11] = {
    107    ff_emu_edge_hfix2_sse2,  ff_emu_edge_hfix4_sse2,  ff_emu_edge_hfix6_sse2,
    108    ff_emu_edge_hfix8_avx2,  ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2,
    109    ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2,
    110    ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2
    111 };
    112 extern emu_edge_hvar_func ff_emu_edge_hvar_avx2;
    113 #endif
    114 
    115 static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
    116                                              ptrdiff_t dst_stride,
    117                                              ptrdiff_t src_stride,
    118                                              x86_reg block_w, x86_reg block_h,
    119                                              x86_reg src_x, x86_reg src_y,
    120                                              x86_reg w, x86_reg h,
    121                                              emu_edge_vfix_func * const *vfix_tbl,
    122                                              emu_edge_vvar_func *v_extend_var,
    123                                              emu_edge_hfix_func * const *hfix_tbl,
    124                                              emu_edge_hvar_func *h_extend_var)
    125 {
    126    x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p;
    127 
    128    if (!w || !h)
    129        return;
    130 
    131    av_assert2(block_w <= FFABS(dst_stride));
    132 
    133    if (src_y >= h) {
    134        src -= src_y*src_stride;
    135        src_y_add = h - 1;
    136        src_y     = h - 1;
    137    } else if (src_y <= -block_h) {
    138        src -= src_y*src_stride;
    139        src_y_add = 1 - block_h;
    140        src_y     = 1 - block_h;
    141    }
    142    if (src_x >= w) {
    143        src   += w - 1 - src_x;
    144        src_x  = w - 1;
    145    } else if (src_x <= -block_w) {
    146        src   += 1 - block_w - src_x;
    147        src_x  = 1 - block_w;
    148    }
    149 
    150    start_y = FFMAX(0, -src_y);
    151    start_x = FFMAX(0, -src_x);
    152    end_y   = FFMIN(block_h, h-src_y);
    153    end_x   = FFMIN(block_w, w-src_x);
    154    av_assert2(start_x < end_x && block_w > 0);
    155    av_assert2(start_y < end_y && block_h > 0);
    156 
    157    // fill in the to-be-copied part plus all above/below
    158    src += (src_y_add + start_y) * src_stride + start_x;
    159    w = end_x - start_x;
    160    if (w <= 22) {
    161        vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride,
    162                        start_y, end_y, block_h);
    163    } else {
    164        v_extend_var(dst + start_x, dst_stride, src, src_stride,
    165                     start_y, end_y, block_h, w);
    166    }
    167 
    168    // fill left
    169    if (start_x) {
    170        if (start_x <= 22) {
    171            hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h);
    172        } else {
    173            h_extend_var(dst, dst_stride,
    174                         start_x, (start_x + 1) >> 1, block_h);
    175        }
    176    }
    177 
    178    // fill right
    179    p = block_w - end_x;
    180    if (p) {
    181        if (p <= 22) {
    182            hfix_tbl[(p - 1) >> 1](dst + end_x - (p & 1), dst_stride,
    183                                   -!(p & 1), block_h);
    184        } else {
    185            h_extend_var(dst + end_x - (p & 1), dst_stride,
    186                         -!(p & 1), (p + 1) >> 1, block_h);
    187        }
    188    }
    189 }
    190 
    191 static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
    192                                              ptrdiff_t buf_stride,
    193                                              ptrdiff_t src_stride,
    194                                              int block_w, int block_h,
    195                                              int src_x, int src_y, int w,
    196                                              int h)
    197 {
    198    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
    199                     src_x, src_y, w, h, vfixtbl_sse2, &ff_emu_edge_vvar_sse,
    200                     hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
    201 }
    202 
    203 #if HAVE_AVX2_EXTERNAL
    204 static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src,
    205                                              ptrdiff_t buf_stride,
    206                                              ptrdiff_t src_stride,
    207                                              int block_w, int block_h,
    208                                              int src_x, int src_y, int w,
    209                                              int h)
    210 {
    211    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
    212                     src_x, src_y, w, h, vfixtbl_sse2, &ff_emu_edge_vvar_sse,
    213                     hfixtbl_avx2, &ff_emu_edge_hvar_avx2);
    214 }
    215 #endif /* HAVE_AVX2_EXTERNAL */
    216 #endif /* HAVE_X86ASM */
    217 
    218 void ff_prefetch_mmxext(const uint8_t *buf, ptrdiff_t stride, int h);
    219 
    220 av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
    221 {
    222 #if HAVE_X86ASM
    223    int cpu_flags = av_get_cpu_flags();
    224 
    225    if (EXTERNAL_MMXEXT(cpu_flags)) {
    226        ctx->prefetch = ff_prefetch_mmxext;
    227    }
    228    if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
    229        ctx->emulated_edge_mc = emulated_edge_mc_sse2;
    230    }
    231 #if HAVE_AVX2_EXTERNAL
    232    if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) {
    233        ctx->emulated_edge_mc = emulated_edge_mc_avx2;
    234    }
    235 #endif
    236 #endif /* HAVE_X86ASM */
    237 }