float_dsp_init.c (5545B)
1 /* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19 #include "libavutil/attributes.h" 20 #include "libavutil/cpu.h" 21 #include "libavutil/float_dsp.h" 22 #include "cpu.h" 23 24 void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, 25 int len); 26 void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, 27 int len); 28 29 void ff_vector_dmul_sse2(double *dst, const double *src0, const double *src1, 30 int len); 31 void ff_vector_dmul_avx(double *dst, const double *src0, const double *src1, 32 int len); 33 34 void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, 35 int len); 36 void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, 37 int len); 38 void ff_vector_fmac_scalar_fma3(float *dst, const float *src, float mul, 39 int len); 40 41 void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, 42 int len); 43 44 void ff_vector_dmac_scalar_sse2(double *dst, const double *src, double mul, 45 int len); 46 void ff_vector_dmac_scalar_avx(double *dst, const double *src, double mul, 47 int len); 48 void ff_vector_dmac_scalar_fma3(double *dst, const double *src, double mul, 49 int len); 50 51 void ff_vector_dmul_scalar_sse2(double *dst, const double *src, 52 double mul, int len); 53 void ff_vector_dmul_scalar_avx(double *dst, const double *src, 54 double mul, int len); 55 56 void ff_vector_fmul_window_sse(float *dst, const float *src0, 57 const float *src1, const float *win, int len); 58 59 void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1, 60 const float *src2, int len); 61 void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1, 62 const float *src2, int len); 63 void ff_vector_fmul_add_fma3(float *dst, const float *src0, const float *src1, 64 const float *src2, int len); 65 66 void ff_vector_fmul_reverse_sse(float *dst, const float *src0, 67 const float *src1, int len); 68 void ff_vector_fmul_reverse_avx(float *dst, const float *src0, 69 const float *src1, int len); 70 void ff_vector_fmul_reverse_avx2(float *dst, const float *src0, 71 const float *src1, int len); 72 73 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); 74 float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order); 75 76 double ff_scalarproduct_double_sse2(const double *v1, const double *v2, size_t order); 77 double ff_scalarproduct_double_avx(const double *v1, const double *v2, size_t order); 78 79 void ff_butterflies_float_sse(float *restrict src0, float *restrict src1, int len); 80 81 av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) 82 { 83 int cpu_flags = av_get_cpu_flags(); 84 85 if (EXTERNAL_SSE(cpu_flags)) { 86 fdsp->vector_fmul = ff_vector_fmul_sse; 87 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; 88 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; 89 fdsp->vector_fmul_window = ff_vector_fmul_window_sse; 90 fdsp->vector_fmul_add = ff_vector_fmul_add_sse; 91 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; 92 fdsp->scalarproduct_float = ff_scalarproduct_float_sse; 93 fdsp->butterflies_float = ff_butterflies_float_sse; 94 } 95 if (EXTERNAL_SSE2(cpu_flags)) { 96 fdsp->vector_dmul = ff_vector_dmul_sse2; 97 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2; 98 fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; 99 fdsp->scalarproduct_double = ff_scalarproduct_double_sse2; 100 } 101 if (EXTERNAL_AVX_FAST(cpu_flags)) { 102 fdsp->vector_fmul = ff_vector_fmul_avx; 103 fdsp->vector_dmul = ff_vector_dmul_avx; 104 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; 105 fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx; 106 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx; 107 fdsp->vector_fmul_add = ff_vector_fmul_add_avx; 108 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; 109 fdsp->scalarproduct_double = ff_scalarproduct_double_avx; 110 } 111 if (EXTERNAL_AVX2_FAST(cpu_flags)) { 112 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2; 113 } 114 if (EXTERNAL_FMA3_FAST(cpu_flags)) { 115 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; 116 fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; 117 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3; 118 fdsp->scalarproduct_float = ff_scalarproduct_float_fma3; 119 } 120 }