fft_avx2.c (3380B)
1 /* 2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <immintrin.h> 13 14 #include "config/aom_dsp_rtcd.h" 15 #include "aom_dsp/aom_dsp_common.h" 16 #include "aom_dsp/fft_common.h" 17 18 extern void aom_transpose_float_sse2(const float *A, float *B, int n); 19 extern void aom_fft_unpack_2d_output_sse2(const float *col_fft, float *output, 20 int n); 21 22 // Generate the 1d forward transforms for float using _mm256 23 GEN_FFT_8(static inline void, avx2, float, __m256, _mm256_load_ps, 24 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 25 _mm256_mul_ps) 26 GEN_FFT_16(static inline void, avx2, float, __m256, _mm256_load_ps, 27 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 28 _mm256_mul_ps) 29 GEN_FFT_32(static inline void, avx2, float, __m256, _mm256_load_ps, 30 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 31 _mm256_mul_ps) 32 33 void aom_fft8x8_float_avx2(const float *input, float *temp, float *output) { 34 aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_avx2, 35 aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8); 36 } 37 38 void aom_fft16x16_float_avx2(const float *input, float *temp, float *output) { 39 aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_avx2, 40 aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8); 41 } 42 43 void aom_fft32x32_float_avx2(const float *input, float *temp, float *output) { 44 aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_avx2, 45 aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8); 46 } 47 48 // Generate the 1d inverse transforms for float using _mm256 49 GEN_IFFT_8(static inline void, avx2, float, __m256, _mm256_load_ps, 50 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 51 _mm256_mul_ps) 52 GEN_IFFT_16(static inline void, avx2, float, __m256, _mm256_load_ps, 53 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 54 _mm256_mul_ps) 55 GEN_IFFT_32(static inline void, avx2, float, __m256, _mm256_load_ps, 56 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps, 57 _mm256_mul_ps) 58 59 void aom_ifft8x8_float_avx2(const float *input, float *temp, float *output) { 60 aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_avx2, 61 aom_ifft1d_8_avx2, aom_transpose_float_sse2, 8); 62 } 63 64 void aom_ifft16x16_float_avx2(const float *input, float *temp, float *output) { 65 aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, 66 aom_fft1d_16_avx2, aom_ifft1d_16_avx2, 67 aom_transpose_float_sse2, 8); 68 } 69 70 void aom_ifft32x32_float_avx2(const float *input, float *temp, float *output) { 71 aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, 72 aom_fft1d_32_avx2, aom_ifft1d_32_avx2, 73 aom_transpose_float_sse2, 8); 74 }