fft.c (8870B)
1 /* 2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "aom_dsp/aom_dsp_common.h" 13 #include "aom_dsp/fft_common.h" 14 #include "config/aom_dsp_rtcd.h" 15 16 static inline void simple_transpose(const float *A, float *B, int n) { 17 for (int y = 0; y < n; y++) { 18 for (int x = 0; x < n; x++) { 19 B[y * n + x] = A[x * n + y]; 20 } 21 } 22 } 23 24 // The 1d transform is real to complex and packs the complex results in 25 // a way to take advantage of conjugate symmetry (e.g., the n/2 + 1 real 26 // components, followed by the n/2 - 1 imaginary components). After the 27 // transform is done on the rows, the first n/2 + 1 columns are real, and 28 // the remaining are the imaginary components. After the transform on the 29 // columns, the region of [0, n/2]x[0, n/2] contains the real part of 30 // fft of the real columns. The real part of the 2d fft also includes the 31 // imaginary part of transformed imaginary columns. This function assembles 32 // the correct outputs while putting the real and imaginary components 33 // next to each other. 34 static inline void unpack_2d_output(const float *col_fft, float *output, 35 int n) { 36 for (int y = 0; y <= n / 2; ++y) { 37 const int y2 = y + n / 2; 38 const int y_extra = y2 > n / 2 && y2 < n; 39 40 for (int x = 0; x <= n / 2; ++x) { 41 const int x2 = x + n / 2; 42 const int x_extra = x2 > n / 2 && x2 < n; 43 output[2 * (y * n + x)] = 44 col_fft[y * n + x] - (x_extra && y_extra ? col_fft[y2 * n + x2] : 0); 45 output[2 * (y * n + x) + 1] = (y_extra ? col_fft[y2 * n + x] : 0) + 46 (x_extra ? col_fft[y * n + x2] : 0); 47 if (y_extra) { 48 output[2 * ((n - y) * n + x)] = 49 col_fft[y * n + x] + 50 (x_extra && y_extra ? col_fft[y2 * n + x2] : 0); 51 output[2 * ((n - y) * n + x) + 1] = 52 -(y_extra ? col_fft[y2 * n + x] : 0) + 53 (x_extra ? col_fft[y * n + x2] : 0); 54 } 55 } 56 } 57 } 58 59 void aom_fft_2d_gen(const float *input, float *temp, float *output, int n, 60 aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose, 61 aom_fft_unpack_func_t unpack, int vec_size) { 62 for (int x = 0; x < n; x += vec_size) { 63 tform(input + x, output + x, n); 64 } 65 transpose(output, temp, n); 66 67 for (int x = 0; x < n; x += vec_size) { 68 tform(temp + x, output + x, n); 69 } 70 transpose(output, temp, n); 71 72 unpack(temp, output, n); 73 } 74 75 static inline void store_float(float *output, float input) { *output = input; } 76 static inline float add_float(float a, float b) { return a + b; } 77 static inline float sub_float(float a, float b) { return a - b; } 78 static inline float mul_float(float a, float b) { return a * b; } 79 80 GEN_FFT_2(void, float, float, float, *, store_float) 81 GEN_FFT_4(void, float, float, float, *, store_float, (float), add_float, 82 sub_float) 83 GEN_FFT_8(void, float, float, float, *, store_float, (float), add_float, 84 sub_float, mul_float) 85 GEN_FFT_16(void, float, float, float, *, store_float, (float), add_float, 86 sub_float, mul_float) 87 GEN_FFT_32(void, float, float, float, *, store_float, (float), add_float, 88 sub_float, mul_float) 89 90 void aom_fft2x2_float_c(const float *input, float *temp, float *output) { 91 aom_fft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, simple_transpose, 92 unpack_2d_output, 1); 93 } 94 95 void aom_fft4x4_float_c(const float *input, float *temp, float *output) { 96 aom_fft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, simple_transpose, 97 unpack_2d_output, 1); 98 } 99 100 void aom_fft8x8_float_c(const float *input, float *temp, float *output) { 101 aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, simple_transpose, 102 unpack_2d_output, 1); 103 } 104 105 void aom_fft16x16_float_c(const float *input, float *temp, float *output) { 106 aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, simple_transpose, 107 unpack_2d_output, 1); 108 } 109 110 void aom_fft32x32_float_c(const float *input, float *temp, float *output) { 111 aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, simple_transpose, 112 unpack_2d_output, 1); 113 } 114 115 void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n, 116 aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi, 117 aom_fft_1d_func_t ifft_multi, 118 aom_fft_transpose_func_t transpose, int vec_size) { 119 // Column 0 and n/2 have conjugate symmetry, so we can directly do the ifft 120 // and get real outputs. 121 for (int y = 0; y <= n / 2; ++y) { 122 output[y * n] = input[2 * y * n]; 123 output[y * n + 1] = input[2 * (y * n + n / 2)]; 124 } 125 for (int y = n / 2 + 1; y < n; ++y) { 126 output[y * n] = input[2 * (y - n / 2) * n + 1]; 127 output[y * n + 1] = input[2 * ((y - n / 2) * n + n / 2) + 1]; 128 } 129 130 for (int i = 0; i < 2; i += vec_size) { 131 ifft_multi(output + i, temp + i, n); 132 } 133 134 // For the other columns, since we don't have a full ifft for complex inputs 135 // we have to split them into the real and imaginary counterparts. 136 // Pack the real component, then the imaginary components. 137 for (int y = 0; y < n; ++y) { 138 for (int x = 1; x < n / 2; ++x) { 139 output[y * n + (x + 1)] = input[2 * (y * n + x)]; 140 } 141 for (int x = 1; x < n / 2; ++x) { 142 output[y * n + (x + n / 2)] = input[2 * (y * n + x) + 1]; 143 } 144 } 145 for (int y = 2; y < vec_size; y++) { 146 fft_single(output + y, temp + y, n); 147 } 148 // This is the part that can be sped up with SIMD 149 for (int y = AOMMAX(2, vec_size); y < n; y += vec_size) { 150 fft_multi(output + y, temp + y, n); 151 } 152 153 // Put the 0 and n/2 th results in the correct place. 154 for (int x = 0; x < n; ++x) { 155 output[x] = temp[x * n]; 156 output[(n / 2) * n + x] = temp[x * n + 1]; 157 } 158 // This rearranges and transposes. 159 for (int y = 1; y < n / 2; ++y) { 160 // Fill in the real columns 161 for (int x = 0; x <= n / 2; ++x) { 162 output[x + y * n] = 163 temp[(y + 1) + x * n] + 164 ((x > 0 && x < n / 2) ? temp[(y + n / 2) + (x + n / 2) * n] : 0); 165 } 166 for (int x = n / 2 + 1; x < n; ++x) { 167 output[x + y * n] = temp[(y + 1) + (n - x) * n] - 168 temp[(y + n / 2) + ((n - x) + n / 2) * n]; 169 } 170 // Fill in the imag columns 171 for (int x = 0; x <= n / 2; ++x) { 172 output[x + (y + n / 2) * n] = 173 temp[(y + n / 2) + x * n] - 174 ((x > 0 && x < n / 2) ? temp[(y + 1) + (x + n / 2) * n] : 0); 175 } 176 for (int x = n / 2 + 1; x < n; ++x) { 177 output[x + (y + n / 2) * n] = temp[(y + 1) + ((n - x) + n / 2) * n] + 178 temp[(y + n / 2) + (n - x) * n]; 179 } 180 } 181 for (int y = 0; y < n; y += vec_size) { 182 ifft_multi(output + y, temp + y, n); 183 } 184 transpose(temp, output, n); 185 } 186 187 GEN_IFFT_2(static void, float, float, float, *, store_float) 188 GEN_IFFT_4(static void, float, float, float, *, store_float, (float), add_float, 189 sub_float) 190 GEN_IFFT_8(static void, float, float, float, *, store_float, (float), add_float, 191 sub_float, mul_float) 192 GEN_IFFT_16(static void, float, float, float, *, store_float, (float), 193 add_float, sub_float, mul_float) 194 GEN_IFFT_32(static void, float, float, float, *, store_float, (float), 195 add_float, sub_float, mul_float) 196 197 void aom_ifft2x2_float_c(const float *input, float *temp, float *output) { 198 aom_ifft_2d_gen(input, temp, output, 2, aom_fft1d_2_float, aom_fft1d_2_float, 199 aom_ifft1d_2_float, simple_transpose, 1); 200 } 201 202 void aom_ifft4x4_float_c(const float *input, float *temp, float *output) { 203 aom_ifft_2d_gen(input, temp, output, 4, aom_fft1d_4_float, aom_fft1d_4_float, 204 aom_ifft1d_4_float, simple_transpose, 1); 205 } 206 207 void aom_ifft8x8_float_c(const float *input, float *temp, float *output) { 208 aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_float, 209 aom_ifft1d_8_float, simple_transpose, 1); 210 } 211 212 void aom_ifft16x16_float_c(const float *input, float *temp, float *output) { 213 aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float, 214 aom_fft1d_16_float, aom_ifft1d_16_float, simple_transpose, 1); 215 } 216 217 void aom_ifft32x32_float_c(const float *input, float *temp, float *output) { 218 aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float, 219 aom_fft1d_32_float, aom_ifft1d_32_float, simple_transpose, 1); 220 }