tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

vp9dsp_init_16bpp.c (5526B)


      1 /*
      2 * VP9 SIMD optimizations
      3 *
      4 * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
      5 *
      6 * This file is part of FFmpeg.
      7 *
      8 * FFmpeg is free software; you can redistribute it and/or
      9 * modify it under the terms of the GNU Lesser General Public
     10 * License as published by the Free Software Foundation; either
     11 * version 2.1 of the License, or (at your option) any later version.
     12 *
     13 * FFmpeg is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16 * Lesser General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU Lesser General Public
     19 * License along with FFmpeg; if not, write to the Free Software
     20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     21 */
     22 
     23 #include "libavutil/attributes.h"
     24 #include "libavutil/cpu.h"
     25 #include "libavutil/x86/cpu.h"
     26 #include "libavcodec/vp9dsp.h"
     27 #include "libavcodec/x86/vp9dsp_init.h"
     28 
     29 #if HAVE_X86ASM
     30 
     31 decl_fpel_func(put,   8,    , mmx);
     32 decl_fpel_func(avg,   8, _16, mmxext);
     33 decl_fpel_func(put,  16,    , sse);
     34 decl_fpel_func(put,  32,    , sse);
     35 decl_fpel_func(put,  64,    , sse);
     36 decl_fpel_func(put, 128,    , sse);
     37 decl_fpel_func(avg,  16, _16, sse2);
     38 decl_fpel_func(avg,  32, _16, sse2);
     39 decl_fpel_func(avg,  64, _16, sse2);
     40 decl_fpel_func(avg, 128, _16, sse2);
     41 decl_fpel_func(put,  32,    , avx);
     42 decl_fpel_func(put,  64,    , avx);
     43 decl_fpel_func(put, 128,    , avx);
     44 decl_fpel_func(avg,  32, _16, avx2);
     45 decl_fpel_func(avg,  64, _16, avx2);
     46 decl_fpel_func(avg, 128, _16, avx2);
     47 
     48 decl_ipred_fns(v,       16, mmx,    sse);
     49 decl_ipred_fns(h,       16, mmxext, sse2);
     50 decl_ipred_fns(dc,      16, mmxext, sse2);
     51 decl_ipred_fns(dc_top,  16, mmxext, sse2);
     52 decl_ipred_fns(dc_left, 16, mmxext, sse2);
     53 decl_ipred_fn(dl,       16,     16, avx2);
     54 decl_ipred_fn(dl,       32,     16, avx2);
     55 decl_ipred_fn(dr,       16,     16, avx2);
     56 decl_ipred_fn(dr,       32,     16, avx2);
     57 decl_ipred_fn(vl,       16,     16, avx2);
     58 decl_ipred_fn(hd,       16,     16, avx2);
     59 
     60 #define decl_ipred_dir_funcs(type) \
     61 decl_ipred_fns(type, 16, sse2,  sse2); \
     62 decl_ipred_fns(type, 16, ssse3, ssse3); \
     63 decl_ipred_fns(type, 16, avx,   avx)
     64 
     65 decl_ipred_dir_funcs(dl);
     66 decl_ipred_dir_funcs(dr);
     67 decl_ipred_dir_funcs(vl);
     68 decl_ipred_dir_funcs(vr);
     69 decl_ipred_dir_funcs(hu);
     70 decl_ipred_dir_funcs(hd);
     71 #endif /* HAVE_X86ASM */
     72 
     73 av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
     74 {
     75 #if HAVE_X86ASM
     76    int cpu_flags = av_get_cpu_flags();
     77 
     78    if (EXTERNAL_MMX(cpu_flags)) {
     79        init_fpel_func(4, 0,   8, put, , mmx);
     80        init_ipred_func(v, VERT, 4, 16, mmx);
     81    }
     82 
     83    if (EXTERNAL_MMXEXT(cpu_flags)) {
     84        init_fpel_func(4, 1,   8, avg, _16, mmxext);
     85        init_ipred_func(h, HOR, 4, 16, mmxext);
     86        init_ipred_func(dc, DC, 4, 16, mmxext);
     87        init_ipred_func(dc_top,  TOP_DC,  4, 16, mmxext);
     88        init_ipred_func(dc_left, LEFT_DC, 4, 16, mmxext);
     89    }
     90 
     91    if (EXTERNAL_SSE(cpu_flags)) {
     92        init_fpel_func(3, 0,  16, put, , sse);
     93        init_fpel_func(2, 0,  32, put, , sse);
     94        init_fpel_func(1, 0,  64, put, , sse);
     95        init_fpel_func(0, 0, 128, put, , sse);
     96        init_8_16_32_ipred_funcs(v, VERT, 16, sse);
     97    }
     98 
     99    if (EXTERNAL_SSE2(cpu_flags)) {
    100        init_fpel_func(3, 1,  16, avg, _16, sse2);
    101        init_fpel_func(2, 1,  32, avg, _16, sse2);
    102        init_fpel_func(1, 1,  64, avg, _16, sse2);
    103        init_fpel_func(0, 1, 128, avg, _16, sse2);
    104        init_8_16_32_ipred_funcs(h, HOR, 16, sse2);
    105        init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
    106        init_8_16_32_ipred_funcs(dc_top,  TOP_DC,  16, sse2);
    107        init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
    108        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2);
    109        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2);
    110        init_ipred_funcs(vl, VERT_LEFT, 16, sse2);
    111        init_ipred_funcs(vr, VERT_RIGHT, 16, sse2);
    112        init_ipred_funcs(hu, HOR_UP, 16, sse2);
    113        init_ipred_funcs(hd, HOR_DOWN, 16, sse2);
    114    }
    115 
    116    if (EXTERNAL_SSSE3(cpu_flags)) {
    117        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3);
    118        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3);
    119        init_ipred_funcs(vl, VERT_LEFT, 16, ssse3);
    120        init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3);
    121        init_ipred_funcs(hu, HOR_UP, 16, ssse3);
    122        init_ipred_funcs(hd, HOR_DOWN, 16, ssse3);
    123    }
    124 
    125    if (EXTERNAL_AVX_FAST(cpu_flags)) {
    126        init_fpel_func(2, 0,  32, put, , avx);
    127        init_fpel_func(1, 0,  64, put, , avx);
    128        init_fpel_func(0, 0, 128, put, , avx);
    129        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx);
    130        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx);
    131        init_ipred_funcs(vl, VERT_LEFT, 16, avx);
    132        init_ipred_funcs(vr, VERT_RIGHT, 16, avx);
    133        init_ipred_funcs(hu, HOR_UP, 16, avx);
    134        init_ipred_funcs(hd, HOR_DOWN, 16, avx);
    135    }
    136 
    137    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
    138        init_fpel_func(2, 1,  32, avg, _16, avx2);
    139        init_fpel_func(1, 1,  64, avg, _16, avx2);
    140        init_fpel_func(0, 1, 128, avg, _16, avx2);
    141        init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2);
    142        init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
    143        init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
    144        init_ipred_func(vl, VERT_LEFT, 16, 16, avx2);
    145        init_ipred_func(hd, HOR_DOWN, 16, 16, avx2);
    146 #if ARCH_X86_64
    147        init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
    148 #endif
    149    }
    150 
    151 #endif /* HAVE_X86ASM */
    152 }